未验证 提交 7ff79c9d 编写于 作者: K Kaipeng Deng 提交者: GitHub

Merge branch 'develop' into fix_dataloader_memory_leak

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
| Github account | name | | Github account | name |
|---|---| |---|---|
| abhinavarora | Abhinav Arora | | abhinavarora | Abhinav Arora |
| andreazanetti | Andrea Zanetti |
| arlesniak | Artur Lesniak |
| arogowie-intel | Adam Osewski |
| backyes | Yan-Fei Wang | | backyes | Yan-Fei Wang |
| baiyfbupt | Yi-Fan Bai | | baiyfbupt | Yi-Fan Bai |
| beckett1124 | Bin Qi | | beckett1124 | Bin Qi |
...@@ -8,6 +11,7 @@ ...@@ -8,6 +11,7 @@
| chengxiaohua1105 | Xiao-Hua Cheng | | chengxiaohua1105 | Xiao-Hua Cheng |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | | cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxysteven | Xing-Yi Cheng | | cxysteven | Xing-Yi Cheng |
| ddokupil | Dariusz Dokupil |
| dzhwinter | Zhi-Hong Dong | | dzhwinter | Zhi-Hong Dong |
| dragonwarrior | Long Wang | | dragonwarrior | Long Wang |
| dyning | Yuning Du | | dyning | Yuning Du |
...@@ -21,6 +25,7 @@ ...@@ -21,6 +25,7 @@
| hedaoyuan | Dao-Yuan He | | hedaoyuan | Dao-Yuan He |
| helinwang | He-Lin Wang | | helinwang | He-Lin Wang |
| jacquesqiao | Long-Fei Qiao | | jacquesqiao | Long-Fei Qiao |
| jakpiase | Jakub Piasecki |
| [jczaja](https://raw.githubusercontent.com/jczaja/Paddle/paddle-poland-team/doc/images/paddle_poland_team.jpg) | Jacek Czaja | | [jczaja](https://raw.githubusercontent.com/jczaja/Paddle/paddle-poland-team/doc/images/paddle_poland_team.jpg) | Jacek Czaja |
| JiayiFeng | Jia-Yi Feng | | JiayiFeng | Jia-Yi Feng |
| kbinias | Krzysztof Binias | | kbinias | Krzysztof Binias |
...@@ -42,6 +47,7 @@ ...@@ -42,6 +47,7 @@
| pakchoi | Chuan-Jiang Song | | pakchoi | Chuan-Jiang Song |
| panyx0718 | Xin Pan | | panyx0718 | Xin Pan |
| pengli09 | Peng Li | | pengli09 | Peng Li |
| pmajchrzak |Piotr Majchrzak |
| pkuyym | Ya-Ming Yang | | pkuyym | Ya-Ming Yang |
| pzelazko-intel | Pawel Zelazko | | pzelazko-intel | Pawel Zelazko |
| [pawelpiotrowicz](https://raw.githubusercontent.com/jczaja/Paddle/paddle-poland-team/doc/images/paddle_poland_team.jpg) | Pawel Piotrowicz | | [pawelpiotrowicz](https://raw.githubusercontent.com/jczaja/Paddle/paddle-poland-team/doc/images/paddle_poland_team.jpg) | Pawel Piotrowicz |
...@@ -72,3 +78,6 @@ ...@@ -72,3 +78,6 @@
| zhaopu7 | Pu Zhao | | zhaopu7 | Pu Zhao |
| zhouxiao-coder | Xiao Zhou | | zhouxiao-coder | Xiao Zhou |
| Zrachel | Rui-Qing Zhang | | Zrachel | Rui-Qing Zhang |
| jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
| mingxu1067 | Ming Huang (NVIDIA) |
| zlsh80826 | Reese Wang (NVIDIA) |
...@@ -12,14 +12,27 @@ ...@@ -12,14 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License # limitations under the License
cmake_minimum_required(VERSION 3.10) if(APPLE AND WITH_ARM)
cmake_policy(VERSION 3.10) # cmake 3.19.2 version starts to support M1
cmake_minimum_required(VERSION 3.19.2)
cmake_policy(VERSION 3.19.2)
else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.10)
cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(system) include(system)
# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
project(paddle CXX C) project(paddle CXX C)
# enable language CUDA # enable language CUDA
...@@ -66,6 +79,11 @@ if(WITH_MUSL) ...@@ -66,6 +79,11 @@ if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
endif() endif()
if(APPLE AND WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
endif()
if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11) if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
endif() endif()
...@@ -90,10 +108,6 @@ if(WIN32) ...@@ -90,10 +108,6 @@ if(WIN32)
if (MSVC_STATIC_CRT) if (MSVC_STATIC_CRT)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019") message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
foreach(flag_var foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
...@@ -105,9 +119,7 @@ if(WIN32) ...@@ -105,9 +119,7 @@ if(WIN32)
endforeach(flag_var) endforeach(flag_var)
endif() endif()
# NOTE(zhouwei25): temporarily change MP to 1 for reducing CPU & memory utilization math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
set(PROCESS_MAX 1)
#math(EXPR PROCESS_MAX "${CPU_CORES} * 1 / 2")
# windows build turn off warnings, use parallel compiling. # windows build turn off warnings, use parallel compiling.
foreach(flag_var foreach(flag_var
...@@ -116,7 +128,10 @@ if(WIN32) ...@@ -116,7 +128,10 @@ if(WIN32)
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}") # NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling
if(NOT WITH_GPU)
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
endif()
endforeach(flag_var) endforeach(flag_var)
foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
set(${flag_var} "${${flag_var}} /w") set(${flag_var} "${${flag_var}} /w")
...@@ -208,16 +223,10 @@ option(WITH_STRIP "Strip so files of Whl packages" OFF) ...@@ -208,16 +223,10 @@ option(WITH_STRIP "Strip so files of Whl packages" OFF)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
set(PY_VERSION 2.7) set(PY_VERSION 3.6)
endif() endif()
set(PYBIND11_PYTHON_VERSION ${PY_VERSION}) set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF # the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES "^(Address|Leak|Memory|Thread|Undefined)$") if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES "^(Address|Leak|Memory|Thread|Undefined)$")
...@@ -282,6 +291,27 @@ if(WITH_GPU) ...@@ -282,6 +291,27 @@ if(WITH_GPU)
endif() endif()
endif() endif()
if(WITH_ROCM)
include(hip)
include(miopen) # set miopen libraries, must before configure
endif(WITH_ROCM)
if (NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
set(WITH_RCCL OFF CACHE STRING
"Disable RCCL when compiling without ROCM" FORCE)
endif()
if(WITH_RCCL)
add_definitions("-DPADDLE_WITH_RCCL")
include(rccl)
else()
if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
endif()
endif()
include(third_party) # download, build, install third_party, Contains about 20+ dependencies include(third_party) # download, build, install third_party, Contains about 20+ dependencies
include(flags) # set paddle compile flags include(flags) # set paddle compile flags
...@@ -306,26 +336,6 @@ include(configure) # add paddle env configuration ...@@ -306,26 +336,6 @@ include(configure) # add paddle env configuration
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
if(WITH_ROCM)
include(hip)
endif(WITH_ROCM)
if (NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
set(WITH_RCCL OFF CACHE STRING
"Disable RCCL when compiling without ROCM" FORCE)
endif()
if(WITH_RCCL)
add_definitions("-DPADDLE_WITH_RCCL")
include(rccl)
else()
if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
endif()
endif()
if(WITH_NV_JETSON) if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE) set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)
endif() endif()
...@@ -333,8 +343,9 @@ endif() ...@@ -333,8 +343,9 @@ endif()
if(WITH_ARM) if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON" FORCE) set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE)
set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE) set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE)
set(WITH_AVX OFF CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE)
add_definitions(-DPADDLE_WITH_ARM) add_definitions(-DPADDLE_WITH_ARM)
endif() endif()
...@@ -352,6 +363,11 @@ if (WITH_MIPS) ...@@ -352,6 +363,11 @@ if (WITH_MIPS)
add_definitions(-DPADDLE_WITH_MIPS) add_definitions(-DPADDLE_WITH_MIPS)
endif() endif()
if (WITH_HETERPS)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new")
endif()
endif()
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......

<p align="center"> <p align="center">
<img align="center" src="doc/imgs/logo.png", width=1600> <img align="center" src="doc/imgs/logo.png", width=1600>
<p> <p>
...@@ -22,7 +21,7 @@ PaddlePaddle is originated from industrial practices with dedication and commitm ...@@ -22,7 +21,7 @@ PaddlePaddle is originated from industrial practices with dedication and commitm
## Installation ## Installation
### Latest PaddlePaddle Release: [v2.0](https://github.com/PaddlePaddle/Paddle/tree/release/2.0) ### Latest PaddlePaddle Release: [v2.1](https://github.com/PaddlePaddle/Paddle/tree/release/2.1)
Our vision is to enable deep learning for everyone via PaddlePaddle. Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
...@@ -34,9 +33,9 @@ pip install paddlepaddle ...@@ -34,9 +33,9 @@ pip install paddlepaddle
pip install paddlepaddle-gpu pip install paddlepaddle-gpu
``` ```
More infomation about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick) For more information about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
Now our developers can acquire Tesla V100 online computing resources for free. If you create a program by AI Studio, you will obtain 10 hours to train models online per day. [Click here to start](https://aistudio.baidu.com/aistudio/index). Now our developers can acquire Tesla V100 online computing resources for free. If you create a program by AI Studio, you will obtain 8 hours to train models online per day. [Click here to start](https://aistudio.baidu.com/aistudio/index).
## FOUR LEADING TECHNOLOGIES ## FOUR LEADING TECHNOLOGIES
...@@ -47,14 +46,13 @@ Now our developers can acquire Tesla V100 online computing resources for free. I ...@@ -47,14 +46,13 @@ Now our developers can acquire Tesla V100 online computing resources for free. I
- **Support Ultra-Large-Scale Training of Deep Neural Networks** - **Support Ultra-Large-Scale Training of Deep Neural Networks**
PaddlePaddle has made breakthroughs in ultra-large-scale deep neural networks training. It launched the world's first large-scale open-source training platform that supports the training of deep networks with 100 billions of features and trillions of parameters using data sources distributed over hundreds of nodes. PaddlePaddle overcomes the online deep learning challenges for ultra-large-scale deep learning models, and further achieved the real-time model updating with more than 1 trillion parameters. PaddlePaddle has made breakthroughs in ultra-large-scale deep neural networks training. It launched the world's first large-scale open-source training platform that supports the training of deep networks with 100 billion features and trillions of parameters using data sources distributed over hundreds of nodes. PaddlePaddle overcomes the online deep learning challenges for ultra-large-scale deep learning models, and further achieved real-time model updating with more than 1 trillion parameters.
[Click here to learn more](https://github.com/PaddlePaddle/Fleet) [Click here to learn more](https://github.com/PaddlePaddle/Fleet)
- **Accelerated High-Performance Inference over Ubiquitous Deployments** - **High-Performance Inference Engines for Comprehensive Deployment Enviroments**
PaddlePaddle is not only compatible with other open-source frameworks for models training, but also works well on the ubiquitous developments, varying from platforms to devices. More specifically, PaddlePaddle accelerates the inference procedure with the fastest speed-up. Note that, a recent breakthrough of inference speed has been made by PaddlePaddle on Huawei's Kirin NPU, through the hardware/software co-optimization. PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
[Click here to learn more](https://github.com/PaddlePaddle/Paddle-Lite)
- **Industry-Oriented Models and Libraries with Open Source Repositories** - **Industry-Oriented Models and Libraries with Open Source Repositories**
...@@ -87,8 +85,13 @@ We provide [English](https://www.paddlepaddle.org.cn/documentation/docs/en/guide ...@@ -87,8 +85,13 @@ We provide [English](https://www.paddlepaddle.org.cn/documentation/docs/en/guide
## Communication ## Communication
- [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): bug reports, feature requests, install issues, usage issues, etc. - [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): bug reports, feature requests, install issues, usage issues, etc.
- QQ discussion group: 778260830 (PaddlePaddle). - QQ discussion group: 793866180 (PaddlePaddle).
- [Forums](https://ai.baidu.com/forum/topic/list/168?pageNo=1): discuss implementations, research, etc. - [Forums](https://ai.baidu.com/forum/topic/list/168?pageNo=1): discuss implementations, research, etc.
## Courses
- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): Courses intorducing high performance server deployments via local and remote services.
- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): Courses intorducing edge deployments from mobile, IoT to web and applets.
## Copyright and License ## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE). PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).

<p align="center"> <p align="center">
<img align="center" src="doc/imgs/logo.png", width=1600> <img align="center" src="doc/imgs/logo.png", width=1600>
<p> <p>
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
## 安装 ## 安装
### PaddlePaddle最新版本: [v2.0](https://github.com/PaddlePaddle/Paddle/tree/release/2.0) ### PaddlePaddle最新版本: [v2.1](https://github.com/PaddlePaddle/Paddle/tree/release/2.1)
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases) 跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
...@@ -32,7 +32,7 @@ pip install paddlepaddle-gpu ...@@ -32,7 +32,7 @@ pip install paddlepaddle-gpu
``` ```
更多安装信息详见官网 [安装说明](https://www.paddlepaddle.org.cn/install/quick) 更多安装信息详见官网 [安装说明](https://www.paddlepaddle.org.cn/install/quick)
PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型更高效。**每日登陆即送10小时**[前往使用免费算力](https://aistudio.baidu.com/aistudio/index) PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型更高效。**每日登陆即送8小时**[前往使用免费算力](https://aistudio.baidu.com/aistudio/index)
## 四大领先技术 ## 四大领先技术
...@@ -47,10 +47,9 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型 ...@@ -47,10 +47,9 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型
[查看详情](https://github.com/PaddlePaddle/Fleet) [查看详情](https://github.com/PaddlePaddle/Fleet)
- **多端多平台部署的高性能推理引擎** - **支持多端多平台的高性能推理部署工具**
飞桨不仅兼容其他开源框架训练的模型,还可以轻松地部署到不同架构的平台设备上。同时,飞桨的推理速度也是全面领先的。尤其经过了跟华为麒麟NPU的软硬一体优化,使得飞桨在NPU上的推理速度进一步突破。 飞桨不仅广泛兼容第三方开源框架训练的模型部署,并且为不同的场景的生产环境提供了完备的推理引擎,包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://paddle-inference.readthedocs.io/en/latest/product_introduction/summary.html),面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving),针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite),以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时,透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
[查看详情](https://github.com/PaddlePaddle/Paddle-Lite)
- **面向产业应用,开源开放覆盖多领域的工业级模型库。** - **面向产业应用,开源开放覆盖多领域的工业级模型库。**
...@@ -83,8 +82,13 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型 ...@@ -83,8 +82,13 @@ PaddlePaddle用户可领取**免费Tesla V100在线算力资源**,训练模型
## 交流与反馈 ## 交流与反馈
- 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)来提交问题、报告与建议 - 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)来提交问题、报告与建议
- QQ群: 778260830 (PaddlePaddle) - QQ群: 793866180 (PaddlePaddle)
- [论坛](https://ai.baidu.com/forum/topic/list/168): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验, 营造良好的论坛氛围 - [论坛](https://ai.baidu.com/forum/topic/list/168): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验, 营造良好的论坛氛围
## 课程
- [服务器部署](https://aistudio.baidu.com/aistudio/course/introduce/19084): 详细介绍高性能服务器端部署实操,包含本地端及服务化Serving部署等
- [端侧部署](https://aistudio.baidu.com/aistudio/course/introduce/22690): 详细介绍端侧多场景部署实操,从移端端设备、IoT、网页到小程序部署
## 版权和许可证 ## 版权和许可证
PaddlePaddle由[Apache-2.0 license](LICENSE)提供 PaddlePaddle由[Apache-2.0 license](LICENSE)提供
...@@ -69,15 +69,21 @@ if(NOT DEFINED CBLAS_PROVIDER) ...@@ -69,15 +69,21 @@ if(NOT DEFINED CBLAS_PROVIDER)
PATHS ${OPENBLAS_LIB_SEARCH_PATHS}) PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
if(OPENBLAS_LAPACKE_INC_DIR AND OPENBLAS_INC_DIR AND OPENBLAS_LIB) if(OPENBLAS_LAPACKE_INC_DIR AND OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS) file(READ "${OPENBLAS_INC_DIR}/openblas_config.h" config_file)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR}) string(REGEX MATCH "OpenBLAS ([0-9]+\.[0-9]+\.[0-9]+)" tmp ${config_file})
set(CBLAS_LIBRARIES ${OPENBLAS_LIB}) string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" ver ${tmp})
add_definitions(-DPADDLE_USE_OPENBLAS) if (${ver} VERSION_GREATER_EQUAL "0.3.7")
add_definitions(-DLAPACK_FOUND) set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR})
message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
add_definitions(-DPADDLE_USE_OPENBLAS)
add_definitions(-DLAPACK_FOUND)
message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
endif()
endif() endif()
endif() endif()
......
# Use ccache if found ccache program # Use ccache if found ccache program
find_program(CCACHE_PATH ccache) if(NOT WIN32)
find_program(CCACHE_PATH ccache)
if(CCACHE_PATH)
execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output)
execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory)
string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output})
message(STATUS "ccache is founded, use ccache to speed up compile on Unix.")
# show statistics summary of ccache
message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory})
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_PATH)
elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja")
# (Note:zhouwei25) Only Ninja Generator can support sccache now
find_program(SCCACHE_PATH sccache)
if(CCACHE_PATH) if(SCCACHE_PATH)
execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output) execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version)
execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory) message(STATUS "${sccache_version} is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")
string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output})
message(STATUS "Ccache is founded, use ccache to speed up compile.") set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH})
# show statistics summary of ccache set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH})
message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory}) # (Note:zhouwei25) sccache for cuda compiler has bug so that it can't be hit
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH}) # refer to https://github.com/mozilla/sccache/issues/1017, so we fix it
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH}) set(CMAKE_CUDA_COMPILER_LAUNCHER ${SCCACHE_PATH})
endif(CCACHE_PATH) endif(SCCACHE_PATH)
endif()
...@@ -143,6 +143,14 @@ elseif(WITH_ROCM) ...@@ -143,6 +143,14 @@ elseif(WITH_ROCM)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
add_definitions(-DEIGEN_USE_GPU) add_definitions(-DEIGEN_USE_GPU)
add_definitions(-DEIGEN_USE_HIP) add_definitions(-DEIGEN_USE_HIP)
if(NOT MIOPEN_FOUND)
message(FATAL_ERROR "Paddle needs MIOpen to compile")
endif()
if(${MIOPEN_VERSION} VERSION_LESS 2090)
message(FATAL_ERROR "Paddle needs MIOPEN >= 2.9 to compile")
endif()
else() else()
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
......
...@@ -95,11 +95,23 @@ function(select_nvcc_arch_flags out_variable) ...@@ -95,11 +95,23 @@ function(select_nvcc_arch_flags out_variable)
if(${CUDA_ARCH_NAME} STREQUAL "Kepler") if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(cuda_arch_bin "30 35") set(cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(cuda_arch_bin "50") if (WITH_NV_JETSON)
set(cuda_arch_bin "53")
else()
set(cuda_arch_bin "50")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
set(cuda_arch_bin "60 61") if (WITH_NV_JETSON)
set(cuda_arch_bin "62")
else()
set(cuda_arch_bin "60 61")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
set(cuda_arch_bin "70") if (WITH_NV_JETSON)
set(cuda_arch_bin "72")
else()
set(cuda_arch_bin "70")
endif()
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75") set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere") elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
...@@ -205,26 +217,18 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") ...@@ -205,26 +217,18 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
if(WIN32) if(WIN32)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(MSVC_STATIC_CRT)
# match the cl's _ITERATOR_DEBUG_LEVEL foreach(flag_var
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"-g -G -D_DEBUG\"") CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
if(MSVC_STATIC_CRT) CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MTd") if(${flag_var} MATCHES "-MD")
else() string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MDd") endif()
endif() endforeach(flag_var)
elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"-DNDEBUG\"")
if(MSVC_STATIC_CRT)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MT")
else()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /MD")
endif()
else()
message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.")
endif() endif()
endif() endif()
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
include(thrust)
...@@ -46,6 +46,7 @@ ExternalProject_Add( ...@@ -46,6 +46,7 @@ ExternalProject_Add(
${BOOST_PROJECT} ${BOOST_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
"${BOOST_DOWNLOAD_CMD}" "${BOOST_DOWNLOAD_CMD}"
URL_MD5 f891e8c2c9424f0565f0129ad9ab4aff
PREFIX ${BOOST_PREFIX_DIR} PREFIX ${BOOST_PREFIX_DIR}
DOWNLOAD_DIR ${BOOST_SOURCE_DIR} DOWNLOAD_DIR ${BOOST_SOURCE_DIR}
SOURCE_DIR ${BOOST_SOURCE_DIR} SOURCE_DIR ${BOOST_SOURCE_DIR}
......
...@@ -49,7 +49,10 @@ ExternalProject_Add( ...@@ -49,7 +49,10 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${BOX_PS_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${BOX_PS_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BOX_PS_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BOX_PS_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${BOX_PS_LIB}
) )
ADD_LIBRARY(box_ps SHARED IMPORTED GLOBAL) ADD_LIBRARY(box_ps SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET box_ps PROPERTY IMPORTED_LOCATION ${BOX_PS_LIB}) SET_PROPERTY(TARGET box_ps PROPERTY IMPORTED_LOCATION ${BOX_PS_LIB})
......
...@@ -45,23 +45,24 @@ ExternalProject_Add( ...@@ -45,23 +45,24 @@ ExternalProject_Add(
PREFIX ${BRPC_SOURCES_DIR} PREFIX ${BRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_INSTALL_PREFIX=${BRPC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${BRPC_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${BRPC_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR=${BRPC_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_PREFIX_PATH=${prefix_path} -DCMAKE_PREFIX_PATH=${prefix_path}
-DWITH_GLOG=ON -DWITH_GLOG=ON
-DIOBUF_WITH_HUGE_BLOCK=ON -DIOBUF_WITH_HUGE_BLOCK=ON
-DBRPC_WITH_RDMA=${WITH_BRPC_RDMA} -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
LIST_SEPARATOR | LIST_SEPARATOR |
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${BRPC_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${BRPC_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${BRPC_LIBRARIES}
) )
# ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy) # ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy)
ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog snappy) ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog snappy)
......
...@@ -33,6 +33,10 @@ ELSE(WIN32) ...@@ -33,6 +33,10 @@ ELSE(WIN32)
SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(APPLE AND WITH_ARM)
SET(CMAKE_CXX_FLAGS "-DCRYPTOPP_ARM_CRC32_AVAILABLE=0")
ENDIF()
set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS}
-DBUILD_SHARED=ON -DBUILD_SHARED=ON
-DBUILD_STATIC=ON -DBUILD_STATIC=ON
...@@ -72,6 +76,7 @@ ExternalProject_Add( ...@@ -72,6 +76,7 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CRYPTOPP_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CRYPTOPP_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${CRYPTOPP_LIBRARIES}
) )
ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL) ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL)
......
...@@ -14,27 +14,27 @@ ...@@ -14,27 +14,27 @@
include(ExternalProject) include(ExternalProject)
set(CUB_PREFIX_DIR ${THIRD_PARTY_PATH}/cub) # Note(zhouwei): extern_cub has code __FILE_, If the path of extern_cub is changed,
set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub/src/extern_cub) # it will effect about 30+ cu files sccache hit and slow compile speed on windows.
set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git) # Therefore, a fixed CUB_PATH will be input to increase the sccache hit rate.
set(CUB_TAG 1.8.0) set(CUB_PATH "${THIRD_PARTY_PATH}/cub" CACHE STRING "A path setting for external_cub path.")
set(CUB_PREFIX_DIR ${CUB_PATH})
cache_third_party(extern_cub set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git)
REPOSITORY ${CUB_REPOSITORY} set(CUB_TAG 1.8.0)
TAG ${CUB_TAG}
DIR CUB_SOURCE_DIR)
SET(CUB_INCLUDE_DIR ${CUB_SOURCE_DIR}) SET(CUB_INCLUDE_DIR ${CUB_PREFIX_DIR}/src/extern_cub)
message("CUB_INCLUDE_DIR is ${CUB_INCLUDE_DIR}")
include_directories(${CUB_INCLUDE_DIR}) include_directories(${CUB_INCLUDE_DIR})
ExternalProject_Add( ExternalProject_Add(
extern_cub extern_cub
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE} ${SHALLOW_CLONE}
"${CUB_DOWNLOAD_CMD}" GIT_REPOSITORY ${CUB_REPOSITORY}
GIT_TAG ${CUB_TAG}
PREFIX ${CUB_PREFIX_DIR} PREFIX ${CUB_PREFIX_DIR}
SOURCE_DIR ${CUB_SOURCE_DIR} UPDATE_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND "" INSTALL_COMMAND ""
......
...@@ -39,6 +39,7 @@ ExternalProject_Add( ...@@ -39,6 +39,7 @@ ExternalProject_Add(
&& cp ${DGC_SOURCES_DIR}/build/lib/libdgc.a ${DGC_LIBRARIES} && cp ${DGC_SOURCES_DIR}/build/lib/libdgc.a ${DGC_LIBRARIES}
&& cp ${DGC_SOURCES_DIR}/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/ && cp ${DGC_SOURCES_DIR}/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
BUILD_BYPRODUCTS ${DGC_LIBRARIES}
) )
ADD_LIBRARY(dgc STATIC IMPORTED GLOBAL) ADD_LIBRARY(dgc STATIC IMPORTED GLOBAL)
......
...@@ -33,7 +33,9 @@ elseif(LINUX) ...@@ -33,7 +33,9 @@ elseif(LINUX)
# which will cause compiler error of using __host__ funciont in __host__ __device__ # which will cause compiler error of using __host__ funciont in __host__ __device__
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src) file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src)
file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst) file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst)
set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst}) file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h native_src1)
file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h native_dst1)
set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1} ${native_dst1})
endif() endif()
endif() endif()
......
...@@ -61,6 +61,7 @@ ExternalProject_Add( ...@@ -61,6 +61,7 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
) )
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
......
...@@ -64,6 +64,7 @@ ExternalProject_Add( ...@@ -64,6 +64,7 @@ ExternalProject_Add(
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
) )
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
......
...@@ -32,7 +32,7 @@ cache_third_party(extern_gloo ...@@ -32,7 +32,7 @@ cache_third_party(extern_gloo
TAG ${GLOO_TAG} TAG ${GLOO_TAG}
DIR GLOO_SOURCE_DIR) DIR GLOO_SOURCE_DIR)
if(WITH_ASCEND OR WITH_ASCEND_CL) if(WITH_ASCEND OR WITH_ASCEND_CL)
ExternalProject_Add( ExternalProject_Add(
extern_gloo extern_gloo
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -47,6 +47,7 @@ cache_third_party(extern_gloo ...@@ -47,6 +47,7 @@ cache_third_party(extern_gloo
&& mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo && mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR} INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR}
COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo" COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo"
BUILD_BYPRODUCTS ${GLOO_LIBRARIES}
) )
else() else()
ExternalProject_Add( ExternalProject_Add(
...@@ -63,6 +64,7 @@ else() ...@@ -63,6 +64,7 @@ else()
&& mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo && mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR} INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR}
COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo" COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo"
BUILD_BYPRODUCTS ${GLOO_LIBRARIES}
) )
endif() endif()
......
...@@ -79,6 +79,8 @@ ExternalProject_Add( ...@@ -79,6 +79,8 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GTEST_LIBRARIES}
BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}
) )
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
......
...@@ -33,6 +33,7 @@ ExternalProject_Add( ...@@ -33,6 +33,7 @@ ExternalProject_Add(
&& cp ${LEVELDB_SOURCES_DIR}/src/extern_leveldb/libleveldb.a ${LEVELDB_LIBRARIES} && cp ${LEVELDB_SOURCES_DIR}/src/extern_leveldb/libleveldb.a ${LEVELDB_LIBRARIES}
&& cp -r ${LEVELDB_SOURCES_DIR}/src/extern_leveldb/include ${LEVELDB_INSTALL_DIR}/ && cp -r ${LEVELDB_SOURCES_DIR}/src/extern_leveldb/include ${LEVELDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
BUILD_BYPRODUCTS ${LEVELDB_LIBRARIES}
) )
ADD_DEPENDENCIES(extern_leveldb snappy) ADD_DEPENDENCIES(extern_leveldb snappy)
......
...@@ -49,7 +49,9 @@ ExternalProject_Add( ...@@ -49,7 +49,9 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
) )
add_library(libmct INTERFACE) add_library(libmct INTERFACE)
......
...@@ -18,8 +18,8 @@ SET(LIBXSMM_SOURCES_DIR ${THIRD_PARTY_PATH}/libxsmm) ...@@ -18,8 +18,8 @@ SET(LIBXSMM_SOURCES_DIR ${THIRD_PARTY_PATH}/libxsmm)
SET(LIBXSMM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/libxsmm) SET(LIBXSMM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/libxsmm)
SET(LIBXSMM_INCLUDE_DIR "${LIBXSMM_INSTALL_DIR}/include" CACHE PATH "LIBXSMM include directory." FORCE) SET(LIBXSMM_INCLUDE_DIR "${LIBXSMM_INSTALL_DIR}/include" CACHE PATH "LIBXSMM include directory." FORCE)
SET(LIBXSMM_LIBRARY_DIR "${LIBXSMM_INSTALL_DIR}/lib" CACHE PATH "LIBXSMM library directory." FORCE) SET(LIBXSMM_LIBRARY_DIR "${LIBXSMM_INSTALL_DIR}/lib" CACHE PATH "LIBXSMM library directory." FORCE)
SET(LIBXSMM_LIBS "${LIBXSMM_LIBRARY_DIR}/libxsmm.a" SET(LIBXSMM_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmm.a")
"${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a") SET(LIBXSMMNOBLAS_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a")
ExternalProject_Add( ExternalProject_Add(
extern_libxsmm extern_libxsmm
...@@ -32,10 +32,12 @@ ExternalProject_Add( ...@@ -32,10 +32,12 @@ ExternalProject_Add(
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
BUILD_COMMAND $(MAKE) --silent PREFIX=${LIBXSMM_INSTALL_DIR} CXX=g++ CC=gcc WARP=0 install BUILD_COMMAND $(MAKE) --silent PREFIX=${LIBXSMM_INSTALL_DIR} CXX=g++ CC=gcc WARP=0 install
INSTALL_COMMAND "" INSTALL_COMMAND ""
BUILD_BYPRODUCTS ${LIBXSMM_LIB}
BUILD_BYPRODUCTS ${LIBXSMMNOBLAS_LIB}
) )
ADD_LIBRARY(libxsmm STATIC IMPORTED GLOBAL) ADD_LIBRARY(libxsmm STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIBRARY_DIR}/libxsmm.a") SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIB}")
SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a") SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMMNOBLAS_LIB}")
MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}") MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}")
include_directories(${LIBXSMM_INCLUDE_DIR}) include_directories(${LIBXSMM_INCLUDE_DIR})
......
...@@ -18,13 +18,21 @@ if(NOT LINUX) ...@@ -18,13 +18,21 @@ if(NOT LINUX)
return() return()
endif() endif()
if(XPU_SDK_ROOT) if (LITE_WITH_XPU)
set(LITE_WITH_XPU ON)
include_directories("${XPU_SDK_ROOT}/XTDK/include")
include_directories("${XPU_SDK_ROOT}/XTCL/include")
add_definitions(-DLITE_SUBGRAPH_WITH_XPU) add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/") IF(WITH_AARCH64)
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/") SET(XPU_SDK_ENV "kylin_aarch64")
ELSEIF(WITH_SUNWAY)
SET(XPU_SDK_ENV "deepin_sw6_64")
ELSEIF(WITH_BDCENTOS)
SET(XPU_SDK_ENV "bdcentos_x86_64")
ELSEIF(WITH_UBUNTU)
SET(XPU_SDK_ENV "ubuntu_x86_64")
ELSEIF(WITH_CENTOS)
SET(XPU_SDK_ENV "centos7_x86_64")
ELSE ()
SET(XPU_SDK_ENV "ubuntu_x86_64")
ENDIF()
endif() endif()
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
...@@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DWITH_TESTING=OFF -DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON -DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=${LITE_WITH_XPU} -DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT} -DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_CODE_META_INFO=OFF -DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON) -DLITE_WITH_ARM=ON)
ExternalProject_Add( ExternalProject_Add(
...@@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_STATIC_CUDA=OFF -DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
-DLITE_WITH_XPU=${LITE_WITH_XPU} -DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_ROOT=${XPU_SDK_ROOT} -DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_CODE_META_INFO=OFF -DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF) -DLITE_WITH_ARM=OFF)
...@@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}") ...@@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}") message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
include_directories(${LITE_SOURCE_DIR}) include_directories(${LITE_SOURCE_DIR})
include_directories(${LITE_BINARY_DIR}) include_directories(${LITE_BINARY_DIR})
if(LITE_WITH_XPU)
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xdnn/include/)
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xre/include/)
endif()
function(external_lite_libs alias path) function(external_lite_libs alias path)
add_library(${alias} SHARED IMPORTED GLOBAL) add_library(${alias} SHARED IMPORTED GLOBAL)
......
...@@ -20,7 +20,8 @@ SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn) ...@@ -20,7 +20,8 @@ SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_REPOSITORY ${GIT_URL}/oneapi-src/oneDNN.git) SET(MKLDNN_REPOSITORY ${GIT_URL}/oneapi-src/oneDNN.git)
SET(MKLDNN_TAG f58682cd8bd0615f41d879f8afc8f1511ab42d24) SET(MKLDNN_TAG 593e0de6267d2575f3e4c9e9818f0f11253d093a)
# Introduce variables: # Introduce variables:
# * CMAKE_INSTALL_LIBDIR # * CMAKE_INSTALL_LIBDIR
...@@ -42,8 +43,10 @@ IF(NOT WIN32) ...@@ -42,8 +43,10 @@ IF(NOT WIN32)
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" CACHE FILEPATH "mkldnn library." FORCE)
ELSE() ELSE()
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc")
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
ENDIF(NOT WIN32) ENDIF(NOT WIN32)
cache_third_party(${MKLDNN_PROJECT} cache_third_party(${MKLDNN_PROJECT}
...@@ -59,8 +62,8 @@ ExternalProject_Add( ...@@ -59,8 +62,8 @@ ExternalProject_Add(
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
PREFIX ${MKLDNN_PREFIX_DIR} PREFIX ${MKLDNN_PREFIX_DIR}
SOURCE_DIR ${MKLDNN_SOURCE_DIR} SOURCE_DIR ${MKLDNN_SOURCE_DIR}
BUILD_ALWAYS 1 UPDATE_COMMAND ""
# UPDATE_COMMAND "" #BUILD_ALWAYS 1
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
...@@ -76,12 +79,8 @@ ExternalProject_Add( ...@@ -76,12 +79,8 @@ ExternalProject_Add(
-DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
-DDNNL_BUILD_TESTS=OFF -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF -DDNNL_BUILD_EXAMPLES=OFF
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
BUILD_BYPRODUCTS ${MKLDNN_LIB}
) )
if(WIN32)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
else(WIN32)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" CACHE FILEPATH "mkldnn library." FORCE)
endif(WIN32)
ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL) ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
...@@ -100,8 +99,11 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) ...@@ -100,8 +99,11 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# it can be directly contained in wheel or capi # it can be directly contained in wheel or capi
if(WIN32) if(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll) SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll)
file(TO_NATIVE_PATH ${MKLDNN_INSTALL_DIR} NATIVE_MKLDNN_INSTALL_DIR)
file(TO_NATIVE_PATH ${MKLDNN_SHARED_LIB} NATIVE_MKLDNN_SHARED_LIB)
ADD_CUSTOM_COMMAND(TARGET ${MKLDNN_PROJECT} POST_BUILD ADD_CUSTOM_COMMAND(TARGET ${MKLDNN_PROJECT} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_INSTALL_DIR}/bin/dnnl.dll ${MKLDNN_SHARED_LIB}) COMMAND (copy ${NATIVE_MKLDNN_INSTALL_DIR}\\bin\\dnnl.dll ${NATIVE_MKLDNN_SHARED_LIB} /Y))
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > ${MKLDNN_INSTALL_DIR}/bin/exports.txt) COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > ${MKLDNN_INSTALL_DIR}/bin/exports.txt)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
...@@ -109,7 +111,7 @@ if(WIN32) ...@@ -109,7 +111,7 @@ if(WIN32)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND echo EXPORTS >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def) COMMAND echo EXPORTS >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND for /f "skip=19 tokens=4" %A in (${MKLDNN_INSTALL_DIR}/bin/exports.txt) do echo %A >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def) COMMAND echo off && (for /f "skip=19 tokens=4" %A in (${MKLDNN_INSTALL_DIR}/bin/exports.txt) do echo %A >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def) && echo on)
add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM add_custom_command(TARGET ${MKLDNN_PROJECT} POST_BUILD VERBATIM
COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib /machine:x64) COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib /machine:x64)
else(WIN32) else(WIN32)
......
...@@ -24,6 +24,7 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") ...@@ -24,6 +24,7 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
IF(WIN32) IF(WIN32)
SET(MKLML_VER "mklml_win_2019.0.5.20190502" CACHE STRING "" FORCE) SET(MKLML_VER "mklml_win_2019.0.5.20190502" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE) SET(MKLML_URL "https://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
SET(MKLML_URL_MD5 ff8c5237570f03eea37377ccfc95a08a)
SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib) SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib) SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll) SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
...@@ -33,6 +34,7 @@ ELSE() ...@@ -33,6 +34,7 @@ ELSE()
# Now enable csrmm function in mklml library temporarily, it will be updated as offical version later. # Now enable csrmm function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "csrmm_mklml_lnx_2019.0.5" CACHE STRING "" FORCE) SET(MKLML_VER "csrmm_mklml_lnx_2019.0.5" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_URL_MD5 bc6a7faea6a2a9ad31752386f3ae87da)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
...@@ -48,10 +50,15 @@ cache_third_party(${MKLML_PROJECT} ...@@ -48,10 +50,15 @@ cache_third_party(${MKLML_PROJECT}
URL ${MKLML_URL} URL ${MKLML_URL}
DIR MKLML_SOURCE_DIR) DIR MKLML_SOURCE_DIR)
# Ninja Generator can not establish the correct dependency relationship between the imported library with target,
# the product file in the ExternalProject need to be specified manually, please refer to
# https://stackoverflow.com/questions/54866067/cmake-and-ninja-missing-and-no-known-rule-to-make-it
# It is the same to all other ExternalProject.
ExternalProject_Add( ExternalProject_Add(
${MKLML_PROJECT} ${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
"${MKLML_DOWNLOAD_CMD}" "${MKLML_DOWNLOAD_CMD}"
URL_MD5 ${MKLML_URL_MD5}
PREFIX ${MKLML_PREFIX_DIR} PREFIX ${MKLML_PREFIX_DIR}
DOWNLOAD_DIR ${MKLML_SOURCE_DIR} DOWNLOAD_DIR ${MKLML_SOURCE_DIR}
SOURCE_DIR ${MKLML_SOURCE_DIR} SOURCE_DIR ${MKLML_SOURCE_DIR}
...@@ -60,7 +67,9 @@ ExternalProject_Add( ...@@ -60,7 +67,9 @@ ExternalProject_Add(
BUILD_COMMAND "" BUILD_COMMAND ""
UPDATE_COMMAND "" UPDATE_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/include ${MKLML_INC_DIR} && INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/include ${MKLML_INC_DIR} &&
${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/lib ${MKLML_LIB_DIR} ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/lib ${MKLML_LIB_DIR}
BUILD_BYPRODUCTS ${MKLML_LIB}
BUILD_BYPRODUCTS ${MKLML_IOMP_LIB}
) )
INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
......
...@@ -19,6 +19,10 @@ SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas) ...@@ -19,6 +19,10 @@ SET(CBLAS_SOURCE_DIR ${THIRD_PARTY_PATH}/openblas/src/extern_openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_REPOSITORY ${GIT_URL}/xianyi/OpenBLAS.git) SET(CBLAS_REPOSITORY ${GIT_URL}/xianyi/OpenBLAS.git)
SET(CBLAS_TAG v0.3.7) SET(CBLAS_TAG v0.3.7)
if(APPLE AND WITH_ARM)
SET(CBLAS_TAG v0.3.13)
endif()
if(WITH_MIPS) if(WITH_MIPS)
SET(CBLAS_TAG v0.3.13) SET(CBLAS_TAG v0.3.13)
endif() endif()
......
...@@ -239,6 +239,10 @@ endif() ...@@ -239,6 +239,10 @@ endif()
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
${OPTIONAL_CACHE_ARGS} ${OPTIONAL_CACHE_ARGS}
BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}
BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}
BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}
BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}
) )
ENDFUNCTION() ENDFUNCTION()
......
...@@ -53,7 +53,10 @@ ExternalProject_Add( ...@@ -53,7 +53,10 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${PSLIB_LIB}
) )
ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
......
...@@ -52,7 +52,10 @@ ExternalProject_Add( ...@@ -52,7 +52,10 @@ ExternalProject_Add(
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${PSLIB_BRPC_LIB}
) )
ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(ROCKSDB_SOURCES_DIR ${THIRD_PARTY_PATH}/rocksdb)
SET(ROCKSDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocksdb)
SET(ROCKSDB_INCLUDE_DIR "${ROCKSDB_INSTALL_DIR}/include" CACHE PATH "rocksdb include directory." FORCE)
SET(ROCKSDB_LIBRARIES "${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a" CACHE FILEPATH "rocksdb library." FORCE)
SET(ROCKSDB_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
INCLUDE_DIRECTORIES(${ROCKSDB_INCLUDE_DIR})
ExternalProject_Add(
extern_rocksdb
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${ROCKSDB_SOURCES_DIR}
GIT_REPOSITORY "https://github.com/facebook/rocksdb"
GIT_TAG v6.10.1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DWITH_BZ2=OFF
-DWITH_GFLAGS=OFF
-DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
# BUILD_BYPRODUCTS ${ROCKSDB_SOURCES_DIR}/src/extern_rocksdb/librocksdb.a
INSTALL_COMMAND mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/
&& cp ${ROCKSDB_SOURCES_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
&& cp -r ${ROCKSDB_SOURCES_DIR}/src/extern_rocksdb/include ${ROCKSDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1
)
ADD_DEPENDENCIES(extern_rocksdb snappy)
ADD_LIBRARY(rocksdb STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES})
ADD_DEPENDENCIES(rocksdb extern_rocksdb)
LIST(APPEND external_project_dependencies rocksdb)
...@@ -22,8 +22,15 @@ set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy includ ...@@ -22,8 +22,15 @@ set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy includ
if(WIN32) if(WIN32)
SET(SNAPPY_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267") SET(SNAPPY_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267")
IF(NOT EXISTS "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
add_custom_command(TARGET extern_snappy POST_BUILD
COMMAND cmake -E copy ${SNAPPY_INSTALL_DIR}/lib/snappy.lib ${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib
)
ENDIF()
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
else() else()
SET(SNAPPY_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) SET(SNAPPY_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
endif() endif()
ExternalProject_Add( ExternalProject_Add(
...@@ -33,35 +40,26 @@ ExternalProject_Add( ...@@ -33,35 +40,26 @@ ExternalProject_Add(
PREFIX ${SNAPPY_SOURCES_DIR} PREFIX ${SNAPPY_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS=${SNAPPY_CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS=${SNAPPY_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DSNAPPY_BUILD_TESTS:BOOL=OFF -DSNAPPY_BUILD_TESTS:BOOL=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${SNAPPY_LIBRARIES}
) )
IF(WIN32)
IF(NOT EXISTS "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
add_custom_command(TARGET extern_snappy POST_BUILD
COMMAND cmake -E copy ${SNAPPY_INSTALL_DIR}/lib/snappy.lib ${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib
)
ENDIF()
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib")
else(WIN32)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
endif (WIN32)
add_library(snappy STATIC IMPORTED GLOBAL) add_library(snappy STATIC IMPORTED GLOBAL)
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES}) set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
......
...@@ -24,7 +24,7 @@ SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) ...@@ -24,7 +24,7 @@ SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
# in case of low internet speed # in case of low internet speed
#set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git) #set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git)
set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git)
set(WARPCTC_TAG c690fc5755abbdbdc98ef78d51ec10a6748a8cd1) set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE) CACHE PATH "Warp-ctc Directory" FORCE)
...@@ -32,6 +32,14 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" ...@@ -32,6 +32,14 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib"
CACHE PATH "Warp-ctc Library Directory" FORCE) CACHE PATH "Warp-ctc Library Directory" FORCE)
IF(WIN32)
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
else(WIN32)
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
ENDIF(WIN32)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR WIN32) IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR WIN32)
SET(USE_OMP OFF) SET(USE_OMP OFF)
ELSE() ELSE()
...@@ -59,7 +67,7 @@ if(WITH_ASCEND OR WITH_ASCEND_CL) ...@@ -59,7 +67,7 @@ if(WITH_ASCEND OR WITH_ASCEND_CL)
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
"-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
...@@ -76,8 +84,24 @@ if(WITH_ASCEND OR WITH_ASCEND_CL) ...@@ -76,8 +84,24 @@ if(WITH_ASCEND OR WITH_ASCEND_CL)
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES}
) )
else() else()
if(WIN32)
set(WARPCTC_C_FLAGS $<FILTER:${CMAKE_C_FLAGS},EXCLUDE,/Zc:inline>)
set(WARPCTC_C_FLAGS_DEBUG $<FILTER:${CMAKE_C_FLAGS_DEBUG},EXCLUDE,/Zc:inline>)
set(WARPCTC_C_FLAGS_RELEASE $<FILTER:${CMAKE_C_FLAGS_RELEASE},EXCLUDE,/Zc:inline>)
set(WARPCTC_CXX_FLAGS $<FILTER:${CMAKE_CXX_FLAGS},EXCLUDE,/Zc:inline>)
set(WARPCTC_CXX_FLAGS_RELEASE $<FILTER:${CMAKE_CXX_FLAGS_RELEASE},EXCLUDE,/Zc:inline>)
set(WARPCTC_CXX_FLAGS_DEBUG $<FILTER:${CMAKE_CXX_FLAGS_DEBUG},EXCLUDE,/Zc:inline>)
else()
set(WARPCTC_C_FLAGS ${CMAKE_C_FLAGS})
set(WARPCTC_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
set(WARPCTC_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE})
set(WARPCTC_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set(WARPCTC_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
set(WARPCTC_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
endif()
ExternalProject_Add( ExternalProject_Add(
extern_warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -85,17 +109,17 @@ else() ...@@ -85,17 +109,17 @@ else()
"${WARPCTC_DOWNLOAD_CMD}" "${WARPCTC_DOWNLOAD_CMD}"
PREFIX ${WARPCTC_PREFIX_DIR} PREFIX ${WARPCTC_PREFIX_DIR}
SOURCE_DIR ${WARPCTC_SOURCE_DIR} SOURCE_DIR ${WARPCTC_SOURCE_DIR}
#UPDATE_COMMAND "" UPDATE_COMMAND ""
PATCH_COMMAND "" PATCH_COMMAND ""
BUILD_ALWAYS 1 #BUILD_ALWAYS 1
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_C_FLAGS=$<FILTER:${CMAKE_C_FLAGS},EXCLUDE,/Zc:inline> -DCMAKE_C_FLAGS=${WARPCTC_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=$<FILTER:${CMAKE_C_FLAGS_DEBUG},EXCLUDE,/Zc:inline> -DCMAKE_C_FLAGS_DEBUG=${WARPCTC_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=$<FILTER:${CMAKE_C_FLAGS_RELEASE},EXCLUDE,/Zc:inline> -DCMAKE_C_FLAGS_RELEASE=${WARPCTC_C_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS=$<FILTER:${CMAKE_CXX_FLAGS},EXCLUDE,/Zc:inline> -DCMAKE_CXX_FLAGS=${WARPCTC_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=$<FILTER:${CMAKE_CXX_FLAGS_RELEASE},EXCLUDE,/Zc:inline> -DCMAKE_CXX_FLAGS_RELEASE=${WARPCTC_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=$<FILTER:${CMAKE_CXX_FLAGS_DEBUG},EXCLUDE,/Zc:inline> -DCMAKE_CXX_FLAGS_DEBUG=${WARPCTC_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
-DWITH_GPU=${WITH_GPU} -DWITH_GPU=${WITH_GPU}
-DWITH_ROCM=${WITH_ROCM} -DWITH_ROCM=${WITH_ROCM}
...@@ -110,18 +134,10 @@ else() ...@@ -110,18 +134,10 @@ else()
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES}
) )
endif() endif()
IF(WIN32)
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
else(WIN32)
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
ENDIF(WIN32)
MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}")
get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY) get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY)
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers. INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers.
......
...@@ -46,7 +46,9 @@ ExternalProject_Add( ...@@ -46,7 +46,9 @@ ExternalProject_Add(
SOURCE_DIR ${XBYAK_SOURCE_DIR} SOURCE_DIR ${XBYAK_SOURCE_DIR}
# UPDATE_COMMAND "" # UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
) )
add_library(xbyak INTERFACE) add_library(xbyak INTERFACE)
......
...@@ -7,52 +7,74 @@ SET(XPU_PROJECT "extern_xpu") ...@@ -7,52 +7,74 @@ SET(XPU_PROJECT "extern_xpu")
SET(XPU_API_LIB_NAME "libxpuapi.so") SET(XPU_API_LIB_NAME "libxpuapi.so")
SET(XPU_RT_LIB_NAME "libxpurt.so") SET(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT XPU_SDK_ROOT) IF(WITH_AARCH64)
if (WITH_AARCH64) SET(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/aarch64/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
elseif(WITH_SUNWAY) SET(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64")
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE) ELSEIF(WITH_SUNWAY)
else() SET(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_04_09.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
endif() SET(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64")
ELSEIF(WITH_BDCENTOS)
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
SET(XPU_API_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") ELSEIF(WITH_UBUNTU)
SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib") SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}") SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}") ELSEIF(WITH_CENTOS)
SET(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib") SET(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(XPU)\n" ELSE ()
"cmake_minimum_required(VERSION 3.0)\n" SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
"install(DIRECTORY xpu/include xpu/lib \n" SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
" DESTINATION ${XPU_INSTALL_DIR})\n") SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
ENDIF()
ExternalProject_Add(
${XPU_PROJECT} SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
${EXTERNAL_PROJECT_LOG_ARGS} SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701")
PREFIX ${XPU_SOURCE_DIR} SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR} SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
DOWNLOAD_COMMAND wget --no-check-certificate ${XPU_URL} -c -q -O xpu.tar.gz SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
&& tar xvf xpu.tar.gz SET(XPU_PACK_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" CACHE STRING "" FORCE)
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT} SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
) SET(XPU_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
else() SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib")
SET(XPU_API_INC_DIR "${XPU_SDK_ROOT}/XTDK/include/")
SET(XPU_API_LIB "${XPU_SDK_ROOT}/XTDK/shlib/libxpuapi.so") SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
SET(XPU_RT_LIB "${XPU_SDK_ROOT}/XTDK/runtime/shlib/libxpurt.so") SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
SET(XPU_LIB_DIR "${XPU_SDK_ROOT}/XTDK/shlib/")
endif() SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")
INCLUDE_DIRECTORIES(${XPU_API_INC_DIR}) FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(XPU)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY xpu/include xpu/lib \n"
" DESTINATION ${XPU_INSTALL_DIR})\n")
ExternalProject_Add(
${XPU_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${XPU_SOURCE_DIR}
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget ${XPU_PACK_DEPENCE_URL}
&& bash pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME}
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
BUILD_BYPRODUCTS ${XPU_API_LIB}
BUILD_BYPRODUCTS ${XPU_RT_LIB}
)
INCLUDE_DIRECTORIES(${XPU_INC_DIR})
ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL) ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL)
set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}") set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}")
...@@ -62,7 +84,7 @@ generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake") ...@@ -62,7 +84,7 @@ generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake")
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
if (WITH_XPU_BKCL) IF(WITH_XPU_BKCL)
MESSAGE(STATUS "Compile with XPU BKCL!") MESSAGE(STATUS "Compile with XPU BKCL!")
ADD_DEFINITIONS(-DPADDLE_WITH_XPU_BKCL) ADD_DEFINITIONS(-DPADDLE_WITH_XPU_BKCL)
...@@ -71,15 +93,11 @@ if (WITH_XPU_BKCL) ...@@ -71,15 +93,11 @@ if (WITH_XPU_BKCL)
SET(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") SET(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
INCLUDE_DIRECTORIES(${XPU_BKCL_INC_DIR}) INCLUDE_DIRECTORIES(${XPU_BKCL_INC_DIR})
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}) TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB})
else(WITH_XPU_BKCL) ELSE(WITH_XPU_BKCL)
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ) TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
endif(WITH_XPU_BKCL) ENDIF(WITH_XPU_BKCL)
if(NOT XPU_SDK_ROOT) ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
else()
ADD_CUSTOM_TARGET(extern_xpu DEPENDS xpulib)
endif()
# Ensure that xpu/api.h can be included without dependency errors. # Ensure that xpu/api.h can be included without dependency errors.
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "") file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "")
......
...@@ -21,10 +21,7 @@ set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include") ...@@ -21,10 +21,7 @@ set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git) set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git)
set(XXHASH_TAG v0.6.5) set(XXHASH_TAG v0.6.5)
cache_third_party(extern_xxhash INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
REPOSITORY ${XXHASH_REPOSITORY}
TAG ${XXHASH_TAG}
DIR XXHASH_SOURCE_DIR)
IF(APPLE) IF(APPLE)
SET(BUILD_CMD sed -i \"\" "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib) SET(BUILD_CMD sed -i \"\" "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib)
...@@ -32,6 +29,17 @@ ELSEIF(UNIX) ...@@ -32,6 +29,17 @@ ELSEIF(UNIX)
SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib) SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib)
ENDIF() ENDIF()
if (WIN32)
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/xxhash.lib")
else()
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
endif ()
cache_third_party(extern_xxhash
REPOSITORY ${XXHASH_REPOSITORY}
TAG ${XXHASH_TAG}
DIR XXHASH_SOURCE_DIR)
if(WIN32) if(WIN32)
ExternalProject_Add( ExternalProject_Add(
extern_xxhash extern_xxhash
...@@ -54,6 +62,7 @@ if(WIN32) ...@@ -54,6 +62,7 @@ if(WIN32)
-DBUILD_SHARED_LIBS=OFF -DBUILD_SHARED_LIBS=OFF
${OPTIONAL_CACHE_ARGS} ${OPTIONAL_CACHE_ARGS}
TEST_COMMAND "" TEST_COMMAND ""
BUILD_BYPRODUCTS ${XXHASH_LIBRARIES}
) )
else() else()
ExternalProject_Add( ExternalProject_Add(
...@@ -68,16 +77,10 @@ else() ...@@ -68,16 +77,10 @@ else()
BUILD_COMMAND ${BUILD_CMD} BUILD_COMMAND ${BUILD_CMD}
INSTALL_COMMAND make PREFIX=${XXHASH_INSTALL_DIR} install INSTALL_COMMAND make PREFIX=${XXHASH_INSTALL_DIR} install
TEST_COMMAND "" TEST_COMMAND ""
BUILD_BYPRODUCTS ${XXHASH_LIBRARIES}
) )
endif() endif()
if (WIN32)
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/xxhash.lib")
else()
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
endif ()
INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
add_library(xxhash STATIC IMPORTED GLOBAL) add_library(xxhash STATIC IMPORTED GLOBAL)
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES}) set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
include_directories(${XXHASH_INCLUDE_DIR}) include_directories(${XXHASH_INCLUDE_DIR})
......
...@@ -25,6 +25,12 @@ set(ZLIB_TAG v1.2.8) ...@@ -25,6 +25,12 @@ set(ZLIB_TAG v1.2.8)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) # For zlib code to include its own headers. INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) # For zlib code to include its own headers.
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include zlib.h. INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include zlib.h.
IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32)
cache_third_party(extern_zlib cache_third_party(extern_zlib
REPOSITORY ${ZLIB_REPOSITORY} REPOSITORY ${ZLIB_REPOSITORY}
TAG ${ZLIB_TAG} TAG ${ZLIB_TAG}
...@@ -51,12 +57,8 @@ ExternalProject_Add( ...@@ -51,12 +57,8 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${ZLIB_LIBRARIES}
) )
IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32)
ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL) ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
......
...@@ -28,7 +28,12 @@ function(CheckCompilerCXX14Flag) ...@@ -28,7 +28,12 @@ function(CheckCompilerCXX14Flag)
endfunction() endfunction()
CheckCompilerCXX14Flag() CheckCompilerCXX14Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
set(CMAKE_CXX_STANDARD 14)
endif()
# safe_set_flag # safe_set_flag
# #
# Set a compile flag only if compiler is support # Set a compile flag only if compiler is support
...@@ -181,8 +186,11 @@ endif() ...@@ -181,8 +186,11 @@ endif()
endif(NOT WIN32) endif(NOT WIN32)
if (APPLE) if (APPLE)
# On Mac OS X build fat binaries with x86_64 architectures by default. if(WITH_ARM)
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) set (CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Build architectures for OSX" FORCE)
else(WITH_ARM)
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
endif(WITH_ARM)
# On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0 # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0
set (COMMON_FLAGS -Wno-deprecated-register) set (COMMON_FLAGS -Wno-deprecated-register)
endif(APPLE) endif(APPLE)
......
...@@ -92,7 +92,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) ...@@ -92,7 +92,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
# including io directory for inference lib paddle_api.h # including io directory for inference lib paddle_api.h
include_directories("${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io") include_directories("${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io")
if(NOT APPLE) if(NOT APPLE AND NOT WIN32)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
if(WITH_PSLIB OR WITH_DISTRIBUTE) if(WITH_PSLIB OR WITH_DISTRIBUTE)
...@@ -100,7 +100,7 @@ if(NOT APPLE) ...@@ -100,7 +100,7 @@ if(NOT APPLE)
else() else()
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
endif() endif()
endif(NOT APPLE) endif()
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library # find all fluid modules is used for paddle fluid static library
...@@ -391,7 +391,7 @@ function(cc_binary TARGET_NAME) ...@@ -391,7 +391,7 @@ function(cc_binary TARGET_NAME)
endfunction(cc_binary) endfunction(cc_binary)
function(cc_test_build TARGET_NAME) function(cc_test_build TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -409,14 +409,12 @@ function(cc_test_build TARGET_NAME) ...@@ -409,14 +409,12 @@ function(cc_test_build TARGET_NAME)
if(WITH_ROCM) if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB}) target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif() endif()
check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
endif() endif()
check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
endfunction() endfunction()
function(cc_test_run TARGET_NAME) function(cc_test_run TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs COMMAND ARGS) set(multiValueArgs COMMAND ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......
...@@ -85,3 +85,5 @@ message(STATUS "HIP library name: ${hip_library_name}") ...@@ -85,3 +85,5 @@ message(STATUS "HIP library name: ${hip_library_name}")
# set HIP link libs # set HIP link libs
find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib) find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}") message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}")
include(thrust)
...@@ -146,12 +146,19 @@ copy(inference_lib_dist ...@@ -146,12 +146,19 @@ copy(inference_lib_dist
SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h
DSTS ${dst_dir}) DSTS ${dst_dir})
# Only GPU need cudaErrorMessage.pb # GPU must copy externalErrorMsg.pb
IF(WITH_GPU) IF(WITH_GPU)
set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data") set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/externalError/data")
copy(inference_lib_dist copy(inference_lib_dist
SRCS ${cudaerror_INCLUDE_DIR} SRCS ${externalError_INCLUDE_DIR}
DSTS ${dst_dir}) DSTS ${dst_dir})
ENDIF()
IF(WITH_XPU)
set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/install/xpu")
copy(inference_lib_dist
SRCS ${XPU_INC_DIR} ${XPU_LIB_DIR}
DSTS ${dst_dir} ${dst_dir})
ENDIF() ENDIF()
# CMakeCache Info # CMakeCache Info
...@@ -193,10 +200,7 @@ copy(inference_lib_dist ...@@ -193,10 +200,7 @@ copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include/* SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include/*
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex64.h SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex128.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
copy(inference_lib_dist copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
...@@ -259,7 +263,7 @@ copy(fluid_lib_dist ...@@ -259,7 +263,7 @@ copy(fluid_lib_dist
set(module "platform") set(module "platform")
set(platform_lib_deps profiler_proto error_codes_proto) set(platform_lib_deps profiler_proto error_codes_proto)
if(WITH_GPU) if(WITH_GPU)
set(platform_lib_deps ${platform_lib_deps} cuda_error_proto) set(platform_lib_deps ${platform_lib_deps} external_error_proto)
endif(WITH_GPU) endif(WITH_GPU)
add_dependencies(fluid_lib_dist ${platform_lib_deps}) add_dependencies(fluid_lib_dist ${platform_lib_deps})
...@@ -323,16 +327,22 @@ function(version version_file) ...@@ -323,16 +327,22 @@ function(version version_file)
"GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
"WITH_MKL: ${WITH_MKL}\n" "WITH_MKL: ${WITH_MKL}\n"
"WITH_MKLDNN: ${WITH_MKLDNN}\n" "WITH_MKLDNN: ${WITH_MKLDNN}\n"
"WITH_GPU: ${WITH_GPU}\n") "WITH_GPU: ${WITH_GPU}\n"
"WITH_ROCM: ${WITH_ROCM}\n")
if(WITH_GPU) if(WITH_GPU)
file(APPEND ${version_file} file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n" "CUDA version: ${CUDA_VERSION}\n"
"CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n") "CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n")
endif() endif()
if(WITH_ROCM)
file(APPEND ${version_file}
"HIP version: ${HIP_VERSION}\n"
"MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n")
endif()
file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n") file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND) if(TENSORRT_FOUND)
file(APPEND ${version_file} file(APPEND ${version_file}
"WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}\n") "WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n")
endif() endif()
if(WITH_LITE) if(WITH_LITE)
file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" "LITE_GIT_TAG: ${LITE_GIT_TAG}\n") file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" "LITE_GIT_TAG: ${LITE_GIT_TAG}\n")
......
...@@ -17,17 +17,34 @@ if(NOT WIN32) ...@@ -17,17 +17,34 @@ if(NOT WIN32)
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG")
if(WITH_GPU)
set(CMAKE_CUDA_FLAGS_DEBUG "-g")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
endif()
else() else()
set(CMAKE_C_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1")
set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG")
set(CMAKE_C_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1")
set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG")
set(CMAKE_CXX_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG")
if(WITH_GPU)
set(CMAKE_CUDA_FLAGS_DEBUG "-Xcompiler=\"-MDd -Zi -Ob0 -Od /RTC1\"")
set(CMAKE_CUDA_FLAGS_RELEASE "-Xcompiler=\"-MD -O2 -Ob2\" -DNDEBUG")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-Xcompiler=\"-MD -Zi -O2 -Ob1\" -DNDEBUG")
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Xcompiler=\"-MD -O1 -Ob1\" -DNDEBUG")
endif()
# It can specify CUDA compile flag manualy, # It can specify CUDA compile flag manualy,
# its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous # its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous
# because CUDA will update by nvidia, then error will occur. # because CUDA will update by nvidia, then error will occur.
# Now, it's only used in VS2015 + CUDA:[10.0, 10.2] # Now, it's only used in VS2015 + CUDA:[10.0, 10.2]
set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props) set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
endif() endif()
if(WITH_GPU)
set(CMAKE_CUDA_FLAGS_DEBUG "-g")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
endif()
if(NOT WITH_ROCM)
return()
endif()
# Now we don't support ROCm on windows
if(WIN32)
return()
endif()
set(MIOPEN_ROOT ${ROCM_PATH}/miopen CACHE PATH "MIOPEN ROOT")
find_path(MIOPEN_INCLUDE_DIR "miopen/miopen.h"
PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/include ${MIOPEN_ROOT}/local/include
$ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/include $ENV{MIOPEN_ROOT}/local/include
NO_DEFAULT_PATH
)
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
find_library(MIOPEN_LIBRARY NAMES "libMIOpen.so"
PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/lib ${MIOPEN_ROOT}/lib64 ${__libpath_hist}
$ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/lib $ENV{MIOPEN_ROOT}/lib64
NO_DEFAULT_PATH
DOC "Path to MIOpen library.")
if(MIOPEN_INCLUDE_DIR AND MIOPEN_LIBRARY)
set(MIOPEN_FOUND ON)
else()
set(MIOPEN_FOUND OFF)
endif()
macro(find_miopen_version miopen_header_file)
file(READ ${miopen_header_file} MIOPEN_VERSION_FILE_CONTENTS)
get_filename_component(MIOPEN_LIB_PATH ${MIOPEN_LIBRARY} DIRECTORY)
string(REGEX MATCH "define MIOPEN_VERSION_MAJOR +([0-9]+)" MIOPEN_MAJOR_VERSION
"${MIOPEN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR +([0-9]+)" "\\1"
MIOPEN_MAJOR_VERSION "${MIOPEN_MAJOR_VERSION}")
string(REGEX MATCH "define MIOPEN_VERSION_MINOR +([0-9]+)" MIOPEN_MINOR_VERSION
"${MIOPEN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define MIOPEN_VERSION_MINOR +([0-9]+)" "\\1"
MIOPEN_MINOR_VERSION "${MIOPEN_MINOR_VERSION}")
string(REGEX MATCH "define MIOPEN_VERSION_PATCH +([0-9]+)" MIOPEN_PATCH_VERSION
"${MIOPEN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define MIOPEN_VERSION_PATCH +([0-9]+)" "\\1"
MIOPEN_PATCH_VERSION "${MIOPEN_PATCH_VERSION}")
string(REGEX MATCH "define MIOPEN_VERSION_TWEAK +([0-9]+)" MIOPEN_TWEAK_VERSION
"${MIOPEN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define MIOPEN_VERSION_TWEAK +([0-9]+)" "\\1"
MIOPEN_TWEAK_VERSION "${MIOPEN_TWEAK_VERSION}")
if(NOT MIOPEN_MAJOR_VERSION)
set(MIOPEN_VERSION "???")
else()
add_definitions("-DMIOPEN_MAJOR_VERSION=\"${MIOPEN_MAJOR_VERSION}\"")
math(EXPR MIOPEN_VERSION
"${MIOPEN_MAJOR_VERSION} * 1000 +
${MIOPEN_MINOR_VERSION} * 10 + ${MIOPEN_PATCH_VERSION}")
message(STATUS "Current MIOpen header is ${MIOPEN_INCLUDE_DIR}/miopen/miopen.h "
"Current MIOpen version is v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}.${MIOPEN_PATCH_VERSION}. ")
endif()
endmacro()
if(MIOPEN_FOUND)
find_miopen_version(${MIOPEN_INCLUDE_DIR}/miopen/version.h)
endif()
...@@ -44,6 +44,9 @@ function(op_library TARGET) ...@@ -44,6 +44,9 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu)
endif() endif()
if (WITH_NV_JETSON)
list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu")
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
...@@ -180,8 +183,8 @@ function(op_library TARGET) ...@@ -180,8 +183,8 @@ function(op_library TARGET)
list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc") list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc") list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu") list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "correlation_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu") list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS} hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
else() else()
...@@ -205,7 +208,7 @@ function(op_library TARGET) ...@@ -205,7 +208,7 @@ function(op_library TARGET)
endif() endif()
# Define operators that don't need pybind here. # Define operators that don't need pybind here.
foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "nccl_op" foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op"
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op" "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
"sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op" "sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op"
......
...@@ -47,11 +47,23 @@ if(TENSORRT_FOUND) ...@@ -47,11 +47,23 @@ if(TENSORRT_FOUND)
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}") "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}") "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
endif() endif()
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
...@@ -60,9 +72,15 @@ if(TENSORRT_FOUND) ...@@ -60,9 +72,15 @@ if(TENSORRT_FOUND)
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY}) link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT) add_definitions(-DPADDLE_WITH_TENSORRT)
......
...@@ -108,13 +108,19 @@ ENDMACRO() ...@@ -108,13 +108,19 @@ ENDMACRO()
# 2. NAME: The name of file, that determin the dirname # 2. NAME: The name of file, that determin the dirname
# #
FUNCTION(file_download_and_uncompress URL NAME) FUNCTION(file_download_and_uncompress URL NAME)
MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}") set(options "")
set(oneValueArgs MD5)
set(multiValueArgs "")
cmake_parse_arguments(URL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}, MD5: ${URL_MD5}")
SET(${NAME}_INCLUDE_DIR ${THIRD_PARTY_PATH}/${NAME}/data PARENT_SCOPE) SET(${NAME}_INCLUDE_DIR ${THIRD_PARTY_PATH}/${NAME}/data PARENT_SCOPE)
ExternalProject_Add( ExternalProject_Add(
extern_download_${NAME} download_${NAME}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${THIRD_PARTY_PATH}/${NAME} PREFIX ${THIRD_PARTY_PATH}/${NAME}
URL ${URL} URL ${URL}
URL_MD5 ${URL_MD5}
TIMEOUT 120
DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}/data/ DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}/data/
SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}/data/ SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}/data/
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
...@@ -123,7 +129,7 @@ FUNCTION(file_download_and_uncompress URL NAME) ...@@ -123,7 +129,7 @@ FUNCTION(file_download_and_uncompress URL NAME)
UPDATE_COMMAND "" UPDATE_COMMAND ""
INSTALL_COMMAND "" INSTALL_COMMAND ""
) )
set(third_party_deps ${third_party_deps} extern_download_${NAME} PARENT_SCOPE) set(third_party_deps ${third_party_deps} download_${NAME} PARENT_SCOPE)
ENDFUNCTION() ENDFUNCTION()
...@@ -209,6 +215,8 @@ list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boos ...@@ -209,6 +215,8 @@ list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boos
list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool) list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool)
include(cblas) # find first, then download, build, install openblas include(cblas) # find first, then download, build, install openblas
message(STATUS "CBLAS_PROVIDER: ${CBLAS_PROVIDER}")
if(${CBLAS_PROVIDER} STREQUAL MKLML) if(${CBLAS_PROVIDER} STREQUAL MKLML)
list(APPEND third_party_deps extern_mklml) list(APPEND third_party_deps extern_mklml)
elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS)
...@@ -242,8 +250,22 @@ if(WITH_GPU) ...@@ -242,8 +250,22 @@ if(WITH_GPU)
include(external/cub) # download cub include(external/cub) # download cub
list(APPEND third_party_deps extern_cub) list(APPEND third_party_deps extern_cub)
endif() endif()
set(CUDAERROR_URL "http://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE) set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage file_download_and_uncompress(${URL} "externalError" MD5 c0749523ebb536eb7382487d645d9cd4) # download file externalErrorMsg.tar.gz
if(WITH_TESTING)
# copy externalErrorMsg.pb, just for unittest can get error message correctly.
set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
if(WIN32 AND (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja"))
set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/fluid/third_party/externalError/data)
else()
set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data)
endif()
set(DST_DIR2 ${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data)
add_custom_command(TARGET download_externalError POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1}
COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2}
COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR}")
endif()
endif(WITH_GPU) endif(WITH_GPU)
if(WITH_XPU) if(WITH_XPU)
...@@ -261,6 +283,14 @@ if(WITH_PSLIB) ...@@ -261,6 +283,14 @@ if(WITH_PSLIB)
if(WITH_PSLIB_BRPC) if(WITH_PSLIB_BRPC)
include(external/pslib_brpc) # download, build, install pslib_brpc include(external/pslib_brpc) # download, build, install pslib_brpc
list(APPEND third_party_deps extern_pslib_brpc) list(APPEND third_party_deps extern_pslib_brpc)
else()
include(external/snappy)
list(APPEND third_party_deps extern_snappy)
include(external/leveldb)
list(APPEND third_party_deps extern_leveldb)
include(external/brpc)
list(APPEND third_party_deps extern_brpc)
endif() endif()
endif(WITH_PSLIB) endif(WITH_PSLIB)
...@@ -296,6 +326,11 @@ if (WITH_PSCORE) ...@@ -296,6 +326,11 @@ if (WITH_PSCORE)
include(external/libmct) # download, build, install libmct include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libmct) list(APPEND third_party_deps extern_libmct)
if (WITH_HETERPS)
include(external/rocksdb) # download, build, install libmct
list(APPEND third_party_deps extern_rocksdb)
endif()
endif() endif()
if(WITH_XBYAK) if(WITH_XBYAK)
......
function(add_thrust_patches_if_necessary)
set(thrust_detect_file ${PROJECT_BINARY_DIR}/detect_thrust.cu)
file(WRITE ${thrust_detect_file} ""
"#include \"thrust/version.h\"\n"
"#include \"thrust/shuffle.h\"\n"
"#include \"stdio.h\"\n"
"int main() {\n"
" int version = THRUST_VERSION;\n"
" printf(\"%d\", version);\n"
" return 0;\n"
"}\n")
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}"
"--run" "${thrust_detect_file}"
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res ERROR_QUIET)
if(NOT nvcc_res EQUAL 0)
set(thrust_patches "${PADDLE_SOURCE_DIR}/patches/thrust")
message(STATUS "Add thrust patches: ${thrust_patches}")
include_directories(${thrust_patches})
endif()
endfunction()
add_thrust_patches_if_necessary()
# Paddle 预测golang API
## 安装
首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``paddle_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c`
## 在Go中使用Paddle预测
首先创建预测配置
``` go
config := paddle.NewAnalysisConfig()
config.SetModel(model_file, params_file)
config.SwitchUseFeedFetchOps(false)
config.SwitchSpecifyInputNames(true)
```
创建predictor
``` go
predictor := paddle.NewPredictor(config)
```
获取输入Tensor和输出Tensor
``` go
inputs = predictor.GetInputTensors()
```
设置输入数据(假设只有一个输入)
``` go
input := inputs[0]
input.SetValue(data)
input.Reshape([]int32{1, 3, 300, 300})
```
运行预测
``` go
predictor.ZeroCopyRun()
```
获取输入Tensor的真实值
``` go
output := outputs[0]
predictor.GetZeroCopyOutput(output)
value := reflect.ValueOf(output.Value())
shape, dtype := paddle.ShapeAndTypeOf(value)
output_data := value.Interface().([][]float32)
```
## 示例
源码见[mobilenet](./demo/mobilenet.go)
下载[数据](https://paddle-inference-dist.cdn.bcebos.com/mobilenet-test-model-data.tar.gz)并解压到当前目录
运行
```bash
go mod init github.com/paddlepaddle
export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
go run ./demo/mobilenet.go
```
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import "github.com/paddlepaddle/paddle"
import "strings"
import "io/ioutil"
import "strconv"
import "reflect"
func main() {
config := paddle.NewAnalysisConfig()
config.SetModel("data/model/__model__", "data/model/__params__")
config.DisableGlogInfo()
config.SwitchUseFeedFetchOps(false)
config.SwitchSpecifyInputNames(true)
predictor := paddle.NewPredictor(config)
println("============== paddle inference ==============")
println("input num: ", predictor.GetInputNum())
println("input name: ", predictor.GetInputNames()[0])
println("output num: ", predictor.GetOutputNum())
println("output name: ", predictor.GetInputNames()[0])
println("============== run inference =================")
input := predictor.GetInputTensors()[0]
output := predictor.GetOutputTensors()[0]
filename := "data/data.txt"
data := ReadData(filename)
input.SetValue(data[:1 * 3 * 300 * 300])
input.Reshape([]int32{1, 3, 300, 300})
predictor.SetZeroCopyInput(input)
predictor.ZeroCopyRun()
predictor.GetZeroCopyOutput(output)
println("============= parse output ===================")
output_val := output.Value()
value := reflect.ValueOf(output_val)
shape, dtype := paddle.ShapeAndTypeOf(value)
switch dtype {
case paddle.PaddleDType(paddle.FLOAT32):
v := value.Interface().([][]float32)
println("v: ", v[0][0], v[0][1], "...")
case paddle.PaddleDType(paddle.UINT8):
v := value.Interface().([][]uint8)
println("v: ", v[0][0], v[0][1], "...")
case paddle.PaddleDType(paddle.INT32):
v := value.Interface().([][]int32)
println("v: ", v[0][0], v[0][1], "...")
case paddle.PaddleDType(paddle.INT64):
v := value.Interface().([][]int64)
println("v: ", v[0][0], v[0][1], "...")
}
println(shape[0], shape[1])
println(output.Shape()[0])
}
func ReadData(filename string) []float32 {
file_bytes, _ := ioutil.ReadFile(filename)
data_slice := strings.Split(string(file_bytes), " ")
var result []float32
for _, n := range data_slice {
r, _ := strconv.ParseFloat(n, 32)
result = append(result, float32(r))
}
return result
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <paddle_c_api.h>
#include <stdio.h>
#include <stdlib.h>
void SetConfig(PD_AnalysisConfig *);
void ReadData(float *data, int size);
int main(int argc, char *argv[]) {
PD_AnalysisConfig *config = PD_NewAnalysisConfig();
SetConfig(config);
PD_Predictor *predictor = PD_NewPredictor(config);
int input_num = PD_GetInputNum(predictor);
printf("Input num: %d\n", input_num);
int output_num = PD_GetOutputNum(predictor);
printf("Output num: %d\n", output_num);
PD_ZeroCopyTensor input;
PD_InitZeroCopyTensor(&input);
input.name = const_cast<char *>(PD_GetInputName(predictor, 0)); // NOLINT
input.data.capacity = sizeof(float) * 1 * 3 * 300 * 300;
input.data.length = input.data.capacity;
input.data.data = malloc(input.data.capacity);
int shape[] = {1, 3, 300, 300};
input.shape.data = static_cast<int *>(shape);
input.shape.capacity = sizeof(shape);
input.shape.length = sizeof(shape);
input.dtype = PD_FLOAT32;
ReadData((float *)input.data.data, 1 * 3 * 300 * 300); // NOLINT
float *data = (float *)input.data.data; // NOLINT
PD_SetZeroCopyInput(predictor, &input);
int *shape_ptr = (int *)input.shape.data; // NOLINT
PD_ZeroCopyRun(predictor);
PD_ZeroCopyTensor output;
PD_InitZeroCopyTensor(&output);
output.name = const_cast<char *>(PD_GetOutputName(predictor, 0)); // NOLINT
PD_GetZeroCopyOutput(predictor, &output);
PD_DestroyZeroCopyTensor(&output);
PD_DeleteAnalysisConfig(config);
PD_DeletePredictor(predictor);
return 0;
}
void SetConfig(PD_AnalysisConfig *config) {
PD_SetModel(config, "data/model/__model__", "data/model/__params__");
PD_SwitchUseFeedFetchOps(config, false);
PD_SwitchSpecifyInputNames(config, true);
PD_DisableGlogInfo(config);
// PD_SwitchIrOptim(config, false);
}
void ReadData(float *data, int n) {
FILE *fp = fopen("data/data.txt", "r");
for (int i = 0; i < n; i++) {
fscanf(fp, "%f", &data[i]);
}
fclose(fp);
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pd_inference_api.h>
#include <stdio.h>
#include <stdlib.h>
void ReadData(float* data, int size);
int main(int argc, char* argv[]) {
PD_Config* config = PD_ConfigCreate();
PD_ConfigSetModel(config, "data/model/__model__", "data/model/__params__");
PD_ConfigDisableGlogInfo(config);
PD_Predictor* predictor = PD_PredictorCreate(config);
// config has destroyed in PD_PredictorCreate
config = NULL;
int input_num = PD_PredictorGetInputNum(predictor);
printf("Input num: %d\n", input_num);
int output_num = PD_PredictorGetOutputNum(predictor);
printf("Output num: %d\n", output_num);
PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
PD_Tensor* input_tensor =
PD_PredictorGetInputHandle(predictor, input_names->data[0]);
PD_OneDimArrayCstrDestroy(input_names);
input_names = NULL;
int32_t shape[] = {1, 3, 300, 300};
float* data = (float*)malloc(sizeof(float) * 1 * 3 * 300 * 300); // NOLINT
ReadData(data, 1 * 3 * 300 * 300); // NOLINT
PD_TensorReshape(input_tensor, 4, shape);
PD_TensorCopyFromCpuFloat(input_tensor, data);
free(data);
data = NULL;
PD_PredictorRun(predictor);
PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
PD_Tensor* output_tensor =
PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
PD_OneDimArrayCstrDestroy(output_names);
output_names = nullptr;
PD_OneDimArrayInt32* out_shape = PD_TensorGetShape(output_tensor);
int32_t size = 1;
for (size_t index = 0; index < out_shape->size; ++index) {
size = size * out_shape->data[index];
}
PD_OneDimArrayInt32Destroy(out_shape);
out_shape = NULL;
data = (float*)malloc(sizeof(float) * size); // NOLINT
PD_TensorCopyToCpuFloat(output_tensor, data);
free(data);
data = NULL;
PD_TensorDestroy(output_tensor);
output_tensor = NULL;
PD_TensorDestroy(input_tensor);
input_tensor = NULL;
PD_PredictorDestroy(predictor);
predictor = NULL;
return 0;
}
void ReadData(float* data, int n) {
FILE* fp = fopen("data/data.txt", "r");
for (int i = 0; i < n; i++) {
fscanf(fp, "%f", &data[i]);
}
fclose(fp);
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <paddle_inference_api.h>
#include <fstream>
#include <iostream>
void SetConfig(paddle::AnalysisConfig *);
int main(int argc, char *argv[]) {
paddle::AnalysisConfig config;
SetConfig(&config);
auto predictor = paddle::CreatePaddlePredictor(config);
auto input_name = predictor->GetInputNames()[0];
auto input = predictor->GetInputTensor(input_name);
std::cout << predictor->GetOutputNames()[0] << std::endl;
std::vector<int> shape{1, 3, 300, 300};
input->Reshape(std::move(shape));
std::vector<float> data(1 * 300 * 300 * 3);
std::ifstream fin("data/data.txt");
for (int i = 0; i < data.size(); i++) {
fin >> data[i];
}
input->copy_from_cpu(data.data());
predictor->ZeroCopyRun();
auto output_name = predictor->GetOutputNames()[0];
auto output = predictor->GetOutputTensor(output_name);
return 0;
}
void SetConfig(paddle::AnalysisConfig *config) {
config->SetModel("data/model/__model__", "data/model/__params__");
config->SwitchUseFeedFetchOps(false);
config->SwitchSpecifyInputNames(true);
config->SwitchIrOptim(false);
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <paddle_c_api.h>
import "C"
import "runtime"
import "unsafe"
type Precision C.Precision
const (
Precision_FLOAT32 Precision = C.kFloat32
Precision_INT8 Precision = C.kInt8
Precision_HALF Precision = C.kHalf
)
type AnalysisConfig struct {
c *C.PD_AnalysisConfig
}
func NewAnalysisConfig() *AnalysisConfig {
c_config := C.PD_NewAnalysisConfig()
config := &AnalysisConfig{c: c_config}
runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
return config
}
func (config *AnalysisConfig) finalize() {
C.PD_DeleteAnalysisConfig(config.c)
}
func (config *AnalysisConfig) SetModel(model, params string) {
//C.printString((*C.char)(unsafe.Pointer(&s[0])))
c_model := C.CString(model)
defer C.free(unsafe.Pointer(c_model))
var c_params *C.char
if params == "" {
c_params = nil
} else {
c_params = C.CString(params)
defer C.free(unsafe.Pointer(c_params))
}
C.PD_SetModel(config.c, c_model, c_params)
}
func (config *AnalysisConfig) ModelDir() string {
return C.GoString(C.PD_ModelDir(config.c))
}
func (config *AnalysisConfig) ProgFile() string {
return C.GoString(C.PD_ProgFile(config.c))
}
func (config *AnalysisConfig) ParamsFile() string {
return C.GoString(C.PD_ParamsFile(config.c))
}
func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb int, device_id int) {
C.PD_EnableUseGpu(config.c, C.int(memory_pool_init_size_mb), C.int(device_id))
}
func (config *AnalysisConfig) DisableGpu() {
C.PD_DisableGpu(config.c)
}
func (config *AnalysisConfig) UseGpu() bool {
return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
}
func (config *AnalysisConfig) GpuDeviceId() int {
return int(C.PD_GpuDeviceId(config.c))
}
func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
return int(C.PD_MemoryPoolInitSizeMb(config.c))
}
func (config *AnalysisConfig) FractionOfGpuMemoryForPool() float32 {
return float32(C.PD_FractionOfGpuMemoryForPool(config.c))
}
func (config *AnalysisConfig) EnableCudnn() {
C.PD_EnableCUDNN(config.c)
}
func (config *AnalysisConfig) CudnnEnabled() bool {
return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
}
func (config *AnalysisConfig) SwitchIrOptim(x bool) {
C.PD_SwitchIrOptim(config.c, C.bool(x))
}
func (config *AnalysisConfig) IrOptim() bool {
return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
}
func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
}
func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
}
func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
}
func (config *AnalysisConfig) SpecifyInputName() bool {
return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
}
func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int, max_batch_size int, min_subgraph_size int, precision Precision, use_static bool, use_calib_mode bool) {
C.PD_EnableTensorRtEngine(config.c, C.int(workspace_size), C.int(max_batch_size), C.int(min_subgraph_size), C.Precision(precision), C.bool(use_static), C.bool(use_calib_mode))
}
func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
}
func (config *AnalysisConfig) SwitchIrDebug(x bool) {
C.PD_SwitchIrDebug(config.c, C.bool(x))
}
func (config *AnalysisConfig) EnableMkldnn() {
C.PD_EnableMKLDNN(config.c)
}
func (config *AnalysisConfig) MkldnnEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnEnabled(config.c))
}
func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
}
func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
return int(C.PD_CpuMathLibraryNumThreads(config.c))
}
func (config *AnalysisConfig) EnableMkldnnQuantizer() {
C.PD_EnableMkldnnQuantizer(config.c)
}
func (config *AnalysisConfig) EnableMkldnnBfloat16() {
C.PD_EnableMkldnnBfloat16(config.c)
}
func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
}
func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
}
// SetModelBuffer
// ModelFromMemory
func (config *AnalysisConfig) EnableMemoryOptim() {
C.PD_EnableMemoryOptim(config.c)
}
func (config *AnalysisConfig) MemoryOptimEnabled() bool {
return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
}
func (config *AnalysisConfig) EnableProfile() {
C.PD_EnableProfile(config.c)
}
func (config *AnalysisConfig) ProfileEnabled() bool {
return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
}
func (config *AnalysisConfig) DisableGlogInfo() {
C.PD_DisableGlogInfo(config.c)
}
func (config *AnalysisConfig) DeletePass(pass string) {
c_pass := C.CString(pass)
defer C.free(unsafe.Pointer(c_pass))
C.PD_DeletePass(config.c, c_pass)
}
func (config *AnalysisConfig) SetInValid() {
C.PD_SetInValid(config.c)
}
func (config *AnalysisConfig) IsValid() bool {
return ConvertCBooleanToGo(C.PD_IsValid(config.c))
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include "paddle_c_api.h"
import "C"
import "reflect"
import "runtime"
import "unsafe"
type Predictor struct {
c *C.PD_Predictor
}
func NewPredictor(config *AnalysisConfig) *Predictor {
c_predictor := C.PD_NewPredictor((*config).c)
predictor := &Predictor{c: c_predictor}
runtime.SetFinalizer(predictor, (*Predictor).finalize)
return predictor
}
func (predictor *Predictor) finalize() {
C.PD_DeletePredictor(predictor.c)
}
func DeletePredictor(predictor *Predictor) {
C.PD_DeletePredictor(predictor.c)
}
func (predictor *Predictor) GetInputNum() int {
return int(C.PD_GetInputNum(predictor.c))
}
func (predictor *Predictor) GetOutputNum() int {
return int(C.PD_GetOutputNum(predictor.c))
}
func (predictor *Predictor) GetInputName(n int) string {
return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
}
func (predictor *Predictor) GetOutputName(n int) string {
return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
}
func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
var result [](*ZeroCopyTensor)
for i := 0; i < predictor.GetInputNum(); i++ {
tensor := NewZeroCopyTensor()
tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
result = append(result, tensor)
}
return result
}
func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
var result [](*ZeroCopyTensor)
for i := 0; i < predictor.GetOutputNum(); i++ {
tensor := NewZeroCopyTensor()
tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
result = append(result, tensor)
}
return result
}
func (predictor *Predictor) GetInputNames() []string {
names := make([]string, predictor.GetInputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetInputName(i)
}
return names
}
func (predictor *Predictor) GetOutputNames() []string {
names := make([]string, predictor.GetOutputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetOutputName(i)
}
return names
}
func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
C.PD_SetZeroCopyInput(predictor.c, tensor.c)
}
func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
tensor.name = C.GoString(tensor.c.name)
var shape []int32
shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
tensor.Reshape(shape)
}
func (predictor *Predictor) ZeroCopyRun() {
C.PD_ZeroCopyRun(predictor.c)
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <string.h>
// #include <paddle_c_api.h>
import "C"
import "runtime"
import "reflect"
import "unsafe"
import (
"bytes"
"encoding/binary"
)
type PaddleDType C.PD_DataType
const (
FLOAT32 PaddleDType = C.PD_FLOAT32
INT32 PaddleDType = C.PD_INT32
INT64 PaddleDType = C.PD_INT64
UINT8 PaddleDType = C.PD_UINT8
UNKDTYPE PaddleDType = C.PD_UNKDTYPE
)
var types = []struct {
gotype reflect.Type
dtype PaddleDType
}{
{reflect.TypeOf(float32(0)), FLOAT32},
{reflect.TypeOf(int32(0)), INT32},
{reflect.TypeOf(int64(0)), INT64},
{reflect.TypeOf(uint8(0)), UINT8},
}
func TypeOfShape(dtype PaddleDType, shape []int32) reflect.Type {
var ret reflect.Type
for _, t := range types {
if dtype == PaddleDType(t.dtype) {
ret = t.gotype
break
}
}
if ret == nil {
panic(bug("Data %v type is not support", dtype))
}
for range shape {
ret = reflect.SliceOf(ret)
}
return ret
}
type ZeroCopyTensor struct {
c *C.PD_ZeroCopyTensor
name string
shape []int32
}
func NewZeroCopyTensor() *ZeroCopyTensor {
c_tensor := C.PD_NewZeroCopyTensor()
tensor := &ZeroCopyTensor{c: c_tensor}
runtime.SetFinalizer(tensor, (*ZeroCopyTensor).finalize)
return tensor
}
func (tensor *ZeroCopyTensor) finalize() {
C.PD_DeleteZeroCopyTensor(tensor.c)
}
func (tensor *ZeroCopyTensor) Shape() []int32 {
return tensor.shape
}
func (tensor *ZeroCopyTensor) Name() string {
return C.GoString(tensor.c.name)
}
func (tensor *ZeroCopyTensor) Rename(name string) {
tensor.name = name
tensor.c.name = (*C.char)(unsafe.Pointer(tensor.c.name))
//tensor.c.name = C.CString(tensor.name)
//defer C.free(unsafe.Pointer(tensor.c.name))
}
func (tensor *ZeroCopyTensor) Reshape(shape []int32) {
tensor.shape = make([]int32, len(shape))
copy(tensor.shape, shape)
length := C.sizeof_int * C.size_t(len(shape))
if tensor.c.shape.capacity < C.size_t(length) {
if tensor.c.shape.capacity != C.size_t(0) {
C.free(tensor.c.shape.data)
}
tensor.c.shape.data = C.malloc(length)
tensor.c.shape.capacity = length
}
tensor.c.shape.length = length
C.memcpy(tensor.c.shape.data, unsafe.Pointer(&shape[0]), length)
}
func (tensor *ZeroCopyTensor) DataType() PaddleDType {
return PaddleDType(tensor.c.dtype)
}
func (tensor *ZeroCopyTensor) SetValue(value interface{}) {
val := reflect.ValueOf(value)
shape, dtype := ShapeAndTypeOf(val)
tensor.Reshape(shape)
num := numel(shape)
length := C.size_t(SizeofDataType(dtype) * num)
if tensor.c.data.capacity < length {
if tensor.c.data.capacity != C.size_t(0) {
C.free(tensor.c.data.data)
}
tensor.c.data.data = C.malloc(length)
tensor.c.data.capacity = length
}
tensor.c.data.length = length
switch dtype {
case PaddleDType(UINT8):
data := val.Interface().([]uint8)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(INT32):
data := val.Interface().([]int32)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(INT64):
data := val.Interface().([]int64)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(FLOAT32):
data := val.Interface().([]float32)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
}
tensor.c.dtype = C.PD_DataType(dtype)
}
func TypeOf(dtype PaddleDType, shape []int32) reflect.Type {
var ret reflect.Type
for _, t := range types {
if t.dtype == dtype {
ret = t.gotype
break
}
}
for range shape {
ret = reflect.SliceOf(ret)
}
return ret
}
func (tensor *ZeroCopyTensor) Value() interface{} {
t := TypeOf(PaddleDType(tensor.c.dtype), tensor.shape)
value := reflect.New(t)
c_bytes := tensor.c.data.data
length := tensor.c.data.length
var slice []byte
if unsafe.Sizeof(unsafe.Pointer(nil)) == 8 {
slice = (*[1<<50 - 1]byte)(unsafe.Pointer(c_bytes))[:length:length]
} else {
slice = (*[1 << 30]byte)(unsafe.Pointer(c_bytes))[:length:length]
}
r := bytes.NewReader(slice)
DecodeTensor(r, tensor.Shape(), t, value)
return reflect.Indirect(value).Interface()
}
func Endian() binary.ByteOrder {
buf := [2]byte{}
*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
var endian binary.ByteOrder
switch buf {
case [2]byte{0xCD, 0xAB}:
endian = binary.LittleEndian
case [2]byte{0xAB, 0xCD}:
endian = binary.BigEndian
default:
panic("Could not determine native endianness.")
}
return endian
}
func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Value) {
switch t.Kind() {
case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
binary.Read(r, Endian(), ptr.Interface())
case reflect.Slice:
value := reflect.Indirect(ptr)
value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
if len(shape) == 1 && value.Len() > 0 {
switch value.Index(0).Kind() {
case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
binary.Read(r, Endian(), value.Interface())
return
}
}
for i := 0; i < value.Len(); i++ {
DecodeTensor(r, shape[1:], t.Elem(), value.Index(i).Addr())
}
}
}
func SizeofDataType(dtype PaddleDType) int32 {
switch dtype {
case UINT8:
return int32(C.sizeof_uchar)
case INT32:
return int32(C.sizeof_int)
case INT64:
return int32(C.sizeof_longlong)
case FLOAT32:
return int32(C.sizeof_float)
}
return -1
}
func ShapeAndTypeOf(val reflect.Value) (shape []int32, dt PaddleDType) {
gotype := val.Type()
for gotype.Kind() == reflect.Array || gotype.Kind() == reflect.Slice {
shape = append(shape, int32(val.Len()))
if val.Len() > 0 {
val = val.Index(0)
}
gotype = gotype.Elem()
}
for _, t := range types {
if gotype.Kind() == t.gotype.Kind() {
return shape, PaddleDType(t.dtype)
}
}
return shape, dt
}
...@@ -11,8 +11,8 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) ...@@ -11,8 +11,8 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
"${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
endif() endif()
add_subdirectory(table)
add_subdirectory(service) add_subdirectory(service)
add_subdirectory(table)
add_subdirectory(test) add_subdirectory(test)
add_subdirectory(index_dataset) add_subdirectory(index_dataset)
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sys/time.h>
#include <iostream>
#include <ostream>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <ThreadPool.h>
#include "boost/lexical_cast.hpp"
#include "glog/logging.h"
#include "paddle/fluid/distributed/common/utils.h"
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/dim.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/string/split.h"
constexpr int FG = 256 * 1024 * 1024;
constexpr int Q_SIZE = 10000;
constexpr int BUCKET = 10;
constexpr char XEOF[] = "EOF";
using boost::lexical_cast;
inline double GetCurrentUS() {
struct timeval time;
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec;
}
namespace paddle {
namespace distributed {
class ShardingMerge {
public:
ShardingMerge() {}
~ShardingMerge() {}
void Merge(const std::vector<std::string> &inputs,
const std::vector<int64_t> &feasigns, const std::string &output,
const int embedding_dim) {
pool_.reset(new ::ThreadPool(inputs.size()));
std::vector<std::future<int>> tasks(inputs.size());
std::vector<std::vector<int64_t>> rows;
rows.resize(inputs.size());
auto begin = GetCurrentUS();
for (int x = 0; x < inputs.size(); ++x) {
tasks[x] = pool_->enqueue([this, x, &rows, &inputs, &feasigns]() -> int {
DeserializeRowsFromFile(inputs[x], feasigns[x], &rows[x]);
return 0;
});
}
for (size_t x = 0; x < tasks.size(); ++x) {
tasks[x].wait();
}
int64_t total_rows = 0;
for (auto x = 0; x < rows.size(); x++) {
total_rows += rows[x].size();
}
auto end = GetCurrentUS();
VLOG(0) << "got " << total_rows
<< " feasigin ids from sparse embedding using " << end - begin;
std::vector<int64_t> total_dims = {total_rows,
static_cast<int64_t>(embedding_dim)};
std::vector<std::vector<int>> batch_buckets;
batch_buckets.resize(inputs.size());
for (int x = 0; x < rows.size(); ++x) {
batch_buckets[x] = bucket(rows[x].size(), BUCKET);
}
std::ofstream out(output, std::ios::binary);
begin = GetCurrentUS();
SerializeRowsToStream(out, rows, batch_buckets, total_rows);
end = GetCurrentUS();
VLOG(0) << "write rows to oostrream using " << end - begin;
begin = GetCurrentUS();
SerializePreTensorToStream(out, total_dims);
end = GetCurrentUS();
VLOG(0) << "write pretensor to oostrream using " << end - begin;
begin = GetCurrentUS();
SerializeValueToStream(out, inputs, batch_buckets, embedding_dim);
end = GetCurrentUS();
VLOG(0) << "write values to oostrream using " << end - begin;
}
private:
void SerializeRowsToStream(std::ostream &os,
const std::vector<std::vector<int64_t>> &rows,
const std::vector<std::vector<int>> &batch_buckets,
int64_t total_rows) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{
// the 2st field, rows information
os.write(reinterpret_cast<const char *>(&total_rows), sizeof(total_rows));
for (int b = 0; b < BUCKET; ++b) {
for (int x = 0; x < batch_buckets.size(); ++x) {
auto begin = batch_buckets[x][b];
auto end = batch_buckets[x][b + 1];
if (end - begin == 0) continue;
os.write(reinterpret_cast<const char *>(rows[x].data() + begin),
sizeof(int64_t) * (end - begin));
}
}
// the 3st field, the height of SelectedRows
int64_t height = total_rows;
os.write(reinterpret_cast<const char *>(&height), sizeof(height));
}
}
void SerializePreTensorToStream(std::ostream &os,
const std::vector<int64_t> &dims) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
framework::proto::VarType::TensorDesc desc;
desc.set_data_type(framework::proto::VarType::FP32);
auto *pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
auto out = desc.SerializeAsString();
os.write(out.data(), size);
}
}
void SerializeValueToVec(std::ifstream &in, const int batch,
const int embedding_dim, std::vector<float> *out) {
auto queue =
std::make_shared<framework::BlockingQueue<std::vector<std::string>>>();
auto read = [batch, &in, &queue]() {
std::string line;
std::vector<std::string> columns;
std::vector<std::string> values_str;
int count = 0;
while (std::getline(in, line)) {
++count;
columns = string::Split(line, '\t');
if (columns.size() != 5) {
VLOG(0) << "unexpected line: " << line << ", skip it";
continue;
}
values_str = string::Split(columns[4], ',');
queue->Push(values_str);
if (count >= batch) {
break;
}
}
queue->Push({});
};
auto write = [embedding_dim, &out, &queue]() {
std::vector<std::string> values_str;
std::string line;
while (true) {
queue->Pop(&values_str);
if (values_str.size() == 0) {
break;
}
for (int x = 0; x < embedding_dim; ++x) {
float v = 0.0;
try {
v = lexical_cast<float>(values_str[x]);
} catch (boost::bad_lexical_cast &e) {
VLOG(0) << " get unexpected line: " << line;
}
out->push_back(v);
}
}
};
std::thread p_read(read);
std::thread p_write(write);
p_read.join();
p_write.join();
}
void SerializeVecToStream(std::ostream &out,
const std::vector<float> &value) {
out.write(reinterpret_cast<const char *>(value.data()),
static_cast<std::streamsize>(sizeof(float) * value.size()));
}
void SerializeValueToStream(
std::ostream &out, const std::vector<std::string> &ins,
const std::vector<std::vector<int>> &batch_buckets,
const int embedding_dim) {
std::vector<std::shared_ptr<std::ifstream>> in_streams;
for (int x = 0; x < ins.size(); ++x) {
in_streams.emplace_back(std::make_shared<std::ifstream>(ins[x]));
}
std::vector<std::future<int>> tasks(ins.size());
for (int b = 0; b < BUCKET; ++b) {
std::vector<std::vector<float>> values;
values.resize(tasks.size());
auto begin = GetCurrentUS();
for (int x = 0; x < tasks.size(); ++x) {
auto batch = batch_buckets[x][b + 1] - batch_buckets[x][b];
values[x].clear();
values[x].reserve(batch * embedding_dim);
}
for (int x = 0; x < tasks.size(); ++x) {
tasks[x] =
pool_->enqueue([this, b, x, &out, &in_streams, &batch_buckets,
&values, embedding_dim]() -> int {
auto batch = batch_buckets[x][b + 1] - batch_buckets[x][b];
if (batch == 0) return 0;
SerializeValueToVec(*(in_streams[x].get()), batch, embedding_dim,
&values[x]);
return 0;
});
}
for (size_t x = 0; x < tasks.size(); ++x) {
tasks[x].wait();
}
auto end = GetCurrentUS();
auto begin1 = GetCurrentUS();
for (size_t x = 0; x < tasks.size(); ++x) {
SerializeVecToStream(out, values[x]);
}
auto end1 = GetCurrentUS();
VLOG(0) << "serialize buckets " << b << " read using " << end - begin
<< ", to oostream using " << end1 - begin1;
}
}
void DeserializeRowsFromFile(const std::string &input_file,
const int64_t feasigns,
std::vector<int64_t> *rows) {
std::string line;
std::vector<std::string> columns;
std::ifstream file(input_file);
rows->reserve(feasigns);
while (std::getline(file, line)) {
columns = string::Split(line, '\t');
if (columns.size() != 5) {
VLOG(0) << "unexpected line: " << line << ", skip it";
continue;
}
rows->push_back(std::stoull(columns[0]));
}
VLOG(0) << "parse " << rows->size() << " embedding rows from "
<< input_file;
}
private:
std::unique_ptr<::ThreadPool> pool_;
};
} // namespace distributed
} // namespace paddle
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <sys/time.h>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -83,5 +85,11 @@ std::string to_string(const std::vector<T>& vec) { ...@@ -83,5 +85,11 @@ std::string to_string(const std::vector<T>& vec) {
} }
return ss.str(); return ss.str();
} }
inline double GetCurrentUS() {
struct timeval time;
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec;
} }
} } // namespace distributed
} // namespace paddle
...@@ -417,8 +417,10 @@ void FleetWrapper::PushSparseFromTensorWithLabelAsync( ...@@ -417,8 +417,10 @@ void FleetWrapper::PushSparseFromTensorWithLabelAsync(
return; return;
} }
void FleetWrapper::LoadModel(const std::string& path, const int mode) { void FleetWrapper::LoadModel(const std::string& path, const std::string& mode) {
auto ret = pserver_ptr_->_worker_ptr->load(path, std::to_string(mode)); auto* communicator = Communicator::GetInstance();
auto ret = communicator->_worker_ptr->load(path, mode);
// auto ret = pserver_ptr_->_worker_ptr->load(path, std::to_string(mode));
ret.wait(); ret.wait();
if (ret.get() != 0) { if (ret.get() != 0) {
LOG(ERROR) << "load model from path:" << path << " failed"; LOG(ERROR) << "load model from path:" << path << " failed";
...@@ -429,8 +431,11 @@ void FleetWrapper::LoadModel(const std::string& path, const int mode) { ...@@ -429,8 +431,11 @@ void FleetWrapper::LoadModel(const std::string& path, const int mode) {
void FleetWrapper::LoadModelOneTable(const uint64_t table_id, void FleetWrapper::LoadModelOneTable(const uint64_t table_id,
const std::string& path, const int mode) { const std::string& path, const int mode) {
auto* communicator = Communicator::GetInstance();
auto ret = auto ret =
pserver_ptr_->_worker_ptr->load(table_id, path, std::to_string(mode)); communicator->_worker_ptr->load(table_id, path, std::to_string(mode));
// auto ret =
// pserver_ptr_->_worker_ptr->load(table_id, path, std::to_string(mode));
ret.wait(); ret.wait();
if (ret.get() != 0) { if (ret.get() != 0) {
LOG(ERROR) << "load model of table id: " << table_id LOG(ERROR) << "load model of table id: " << table_id
......
...@@ -200,7 +200,7 @@ class FleetWrapper { ...@@ -200,7 +200,7 @@ class FleetWrapper {
void PrintTableStat(const uint64_t table_id); void PrintTableStat(const uint64_t table_id);
// mode = 0, load all feature // mode = 0, load all feature
// mode = 1, load delta feature, which means load diff // mode = 1, load delta feature, which means load diff
void LoadModel(const std::string& path, const int mode); void LoadModel(const std::string& path, const std::string& mode);
// mode = 0, load all feature // mode = 0, load all feature
// mode = 1, load delta feature, which means load diff // mode = 1, load delta feature, which means load diff
void LoadModelOneTable(const uint64_t table_id, const std::string& path, void LoadModelOneTable(const uint64_t table_id, const std::string& path,
......
...@@ -13,13 +13,10 @@ ...@@ -13,13 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/distributed/index_dataset/index_sampler.h" #include "paddle/fluid/distributed/index_dataset/index_sampler.h"
#include "paddle/fluid/operators/math/sampler.h"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
using Sampler = paddle::operators::math::Sampler;
std::vector<std::vector<uint64_t>> LayerWiseSampler::sample( std::vector<std::vector<uint64_t>> LayerWiseSampler::sample(
const std::vector<std::vector<uint64_t>>& user_inputs, const std::vector<std::vector<uint64_t>>& user_inputs,
const std::vector<uint64_t>& target_ids, bool with_hierarchy) { const std::vector<uint64_t>& target_ids, bool with_hierarchy) {
...@@ -30,22 +27,7 @@ std::vector<std::vector<uint64_t>> LayerWiseSampler::sample( ...@@ -30,22 +27,7 @@ std::vector<std::vector<uint64_t>> LayerWiseSampler::sample(
std::vector<uint64_t>(user_feature_num + 2)); std::vector<uint64_t>(user_feature_num + 2));
auto max_layer = tree_->Height(); auto max_layer = tree_->Height();
std::vector<Sampler*> sampler_vec(max_layer - start_sample_layer_);
std::vector<std::vector<IndexNode>> layer_ids(max_layer -
start_sample_layer_);
auto layer_index = max_layer - 1;
size_t idx = 0; size_t idx = 0;
while (layer_index >= start_sample_layer_) {
auto layer_codes = tree_->GetLayerCodes(layer_index);
layer_ids[idx] = tree_->GetNodes(layer_codes);
sampler_vec[idx] = new paddle::operators::math::UniformSampler(
layer_ids[idx].size() - 1, seed_);
layer_index--;
idx++;
}
idx = 0;
for (size_t i = 0; i < input_num; i++) { for (size_t i = 0; i < input_num; i++) {
auto travel_codes = auto travel_codes =
tree_->GetTravelCodes(target_ids[i], start_sample_layer_); tree_->GetTravelCodes(target_ids[i], start_sample_layer_);
...@@ -76,18 +58,15 @@ std::vector<std::vector<uint64_t>> LayerWiseSampler::sample( ...@@ -76,18 +58,15 @@ std::vector<std::vector<uint64_t>> LayerWiseSampler::sample(
for (int idx_offset = 0; idx_offset < layer_counts_[j]; idx_offset++) { for (int idx_offset = 0; idx_offset < layer_counts_[j]; idx_offset++) {
int sample_res = 0; int sample_res = 0;
do { do {
sample_res = sampler_vec[j]->Sample(); sample_res = sampler_vec_[j]->Sample();
} while (layer_ids[j][sample_res].id() == travel_path[j].id()); } while (layer_ids_[j][sample_res].id() == travel_path[j].id());
outputs[idx + idx_offset][user_feature_num] = outputs[idx + idx_offset][user_feature_num] =
layer_ids[j][sample_res].id(); layer_ids_[j][sample_res].id();
outputs[idx + idx_offset][user_feature_num + 1] = 0; outputs[idx + idx_offset][user_feature_num + 1] = 0;
} }
idx += layer_counts_[j]; idx += layer_counts_[j];
} }
} }
for (size_t i = 0; i < sampler_vec.size(); i++) {
delete sampler_vec[i];
}
return outputs; return outputs;
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/distributed/index_dataset/index_wrapper.h" #include "paddle/fluid/distributed/index_dataset/index_wrapper.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -83,6 +84,23 @@ class LayerWiseSampler : public IndexSampler { ...@@ -83,6 +84,23 @@ class LayerWiseSampler : public IndexSampler {
} }
reverse(layer_counts_.begin(), layer_counts_.end()); reverse(layer_counts_.begin(), layer_counts_.end());
VLOG(3) << "sample counts sum: " << layer_counts_sum_; VLOG(3) << "sample counts sum: " << layer_counts_sum_;
auto max_layer = tree_->Height();
sampler_vec_.clear();
layer_ids_.clear();
auto layer_index = max_layer - 1;
size_t idx = 0;
while (layer_index >= start_sample_layer_) {
auto layer_codes = tree_->GetLayerCodes(layer_index);
layer_ids_.push_back(tree_->GetNodes(layer_codes));
auto sampler_temp =
std::make_shared<paddle::operators::math::UniformSampler>(
layer_ids_[idx].size() - 1, seed_);
sampler_vec_.push_back(sampler_temp);
layer_index--;
idx++;
}
} }
std::vector<std::vector<uint64_t>> sample( std::vector<std::vector<uint64_t>> sample(
const std::vector<std::vector<uint64_t>>& user_inputs, const std::vector<std::vector<uint64_t>>& user_inputs,
...@@ -94,6 +112,8 @@ class LayerWiseSampler : public IndexSampler { ...@@ -94,6 +112,8 @@ class LayerWiseSampler : public IndexSampler {
std::shared_ptr<TreeIndex> tree_{nullptr}; std::shared_ptr<TreeIndex> tree_{nullptr};
int seed_{0}; int seed_{0};
int start_sample_layer_{1}; int start_sample_layer_{1};
std::vector<std::shared_ptr<paddle::operators::math::Sampler>> sampler_vec_;
std::vector<std::vector<IndexNode>> layer_ids_;
}; };
} // end namespace distributed } // end namespace distributed
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/distributed/service/brpc_ps_server.h" #include "paddle/fluid/distributed/service/brpc_ps_server.h"
#include <thread> // NOLINT #include <thread> // NOLINT
#include "butil/object_pool.h"
#include "paddle/fluid/distributed/table/depends/sparse_utils.h" #include "paddle/fluid/distributed/table/depends/sparse_utils.h"
#include "paddle/fluid/distributed/table/table.h" #include "paddle/fluid/distributed/table/table.h"
#include "paddle/fluid/framework/archive.h" #include "paddle/fluid/framework/archive.h"
...@@ -196,12 +197,13 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request, ...@@ -196,12 +197,13 @@ int32_t BrpcPsService::pull_dense(Table *table, const PsRequestMessage &request,
return 0; return 0;
} }
std::vector<float> res_data; auto res_data = butil::get_object<std::vector<float>>();
res_data.resize(num * table->value_accesor()->select_size() / sizeof(float)); res_data->resize(num * table->value_accesor()->select_size() / sizeof(float));
table->pull_dense(res_data.data(), num); table->pull_dense(res_data->data(), num);
cntl->response_attachment().append((char *)res_data.data(), cntl->response_attachment().append((char *)(res_data->data()),
res_data.size() * sizeof(float)); res_data->size() * sizeof(float));
butil::return_object(res_data);
return 0; return 0;
} }
...@@ -367,12 +369,13 @@ int32_t BrpcPsService::pull_sparse(Table *table, ...@@ -367,12 +369,13 @@ int32_t BrpcPsService::pull_sparse(Table *table,
value.DeserializeFromBytes(const_cast<void *>(data)); value.DeserializeFromBytes(const_cast<void *>(data));
std::vector<float> res_data; auto res_data = butil::get_object<std::vector<float>>();
res_data.resize(num * dim); res_data->resize(num * dim);
table->pull_sparse(res_data.data(), value); table->pull_sparse(res_data->data(), value);
cntl->response_attachment().append((char *)res_data.data(), cntl->response_attachment().append((char *)(res_data->data()),
res_data.size() * sizeof(float)); res_data->size() * sizeof(float));
butil::return_object(res_data);
return 0; return 0;
} }
......
...@@ -80,11 +80,11 @@ std::future<int32_t> GraphBrpcClient::get_node_feat( ...@@ -80,11 +80,11 @@ std::future<int32_t> GraphBrpcClient::get_node_feat(
[&, node_id_buckets, query_idx_buckets, request_call_num](void *done) { [&, node_id_buckets, query_idx_buckets, request_call_num](void *done) {
int ret = 0; int ret = 0;
auto *closure = (DownpourBrpcClosure *)done; auto *closure = (DownpourBrpcClosure *)done;
int fail_num = 0; size_t fail_num = 0;
for (int request_idx = 0; request_idx < request_call_num; for (int request_idx = 0; request_idx < request_call_num;
++request_idx) { ++request_idx) {
if (closure->check_response(request_idx, if (closure->check_response(request_idx, PS_GRAPH_GET_NODE_FEAT) !=
PS_GRAPH_SAMPLE_NEIGHBOORS) != 0) { 0) {
++fail_num; ++fail_num;
} else { } else {
auto &res_io_buffer = auto &res_io_buffer =
...@@ -144,6 +144,163 @@ std::future<int32_t> GraphBrpcClient::get_node_feat( ...@@ -144,6 +144,163 @@ std::future<int32_t> GraphBrpcClient::get_node_feat(
return fut; return fut;
} }
std::future<int32_t> GraphBrpcClient::clear_nodes(uint32_t table_id) {
DownpourBrpcClosure *closure = new DownpourBrpcClosure(
server_size, [&, server_size = this->server_size ](void *done) {
int ret = 0;
auto *closure = (DownpourBrpcClosure *)done;
size_t fail_num = 0;
for (size_t request_idx = 0; request_idx < server_size; ++request_idx) {
if (closure->check_response(request_idx, PS_GRAPH_CLEAR) != 0) {
++fail_num;
break;
}
}
ret = fail_num == 0 ? 0 : -1;
closure->set_promise_value(ret);
});
auto promise = std::make_shared<std::promise<int32_t>>();
closure->add_promise(promise);
std::future<int> fut = promise->get_future();
for (size_t i = 0; i < server_size; i++) {
int server_index = i;
closure->request(server_index)->set_cmd_id(PS_GRAPH_CLEAR);
closure->request(server_index)->set_table_id(table_id);
closure->request(server_index)->set_client_id(_client_id);
GraphPsService_Stub rpc_stub =
getServiceStub(get_cmd_channel(server_index));
closure->cntl(server_index)->set_log_id(butil::gettimeofday_ms());
rpc_stub.service(closure->cntl(server_index),
closure->request(server_index),
closure->response(server_index), closure);
}
return fut;
}
std::future<int32_t> GraphBrpcClient::add_graph_node(
uint32_t table_id, std::vector<uint64_t> &node_id_list,
std::vector<bool> &is_weighted_list) {
std::vector<std::vector<uint64_t>> request_bucket;
std::vector<std::vector<bool>> is_weighted_bucket;
bool add_weight = is_weighted_list.size() > 0;
std::vector<int> server_index_arr;
std::vector<int> index_mapping(server_size, -1);
for (size_t query_idx = 0; query_idx < node_id_list.size(); ++query_idx) {
int server_index = get_server_index_by_id(node_id_list[query_idx]);
if (index_mapping[server_index] == -1) {
index_mapping[server_index] = request_bucket.size();
server_index_arr.push_back(server_index);
request_bucket.push_back(std::vector<uint64_t>());
if (add_weight) is_weighted_bucket.push_back(std::vector<bool>());
}
request_bucket[index_mapping[server_index]].push_back(
node_id_list[query_idx]);
if (add_weight)
is_weighted_bucket[index_mapping[server_index]].push_back(
query_idx < is_weighted_list.size() ? is_weighted_list[query_idx]
: false);
}
size_t request_call_num = request_bucket.size();
DownpourBrpcClosure *closure = new DownpourBrpcClosure(
request_call_num, [&, request_call_num](void *done) {
int ret = 0;
auto *closure = (DownpourBrpcClosure *)done;
size_t fail_num = 0;
for (size_t request_idx = 0; request_idx < request_call_num;
++request_idx) {
if (closure->check_response(request_idx, PS_GRAPH_ADD_GRAPH_NODE) !=
0) {
++fail_num;
}
}
ret = fail_num == request_call_num ? -1 : 0;
closure->set_promise_value(ret);
});
auto promise = std::make_shared<std::promise<int32_t>>();
closure->add_promise(promise);
std::future<int> fut = promise->get_future();
for (size_t request_idx = 0; request_idx < request_call_num; ++request_idx) {
int server_index = server_index_arr[request_idx];
closure->request(request_idx)->set_cmd_id(PS_GRAPH_ADD_GRAPH_NODE);
closure->request(request_idx)->set_table_id(table_id);
closure->request(request_idx)->set_client_id(_client_id);
size_t node_num = request_bucket[request_idx].size();
closure->request(request_idx)
->add_params((char *)request_bucket[request_idx].data(),
sizeof(uint64_t) * node_num);
if (add_weight) {
bool weighted[is_weighted_bucket[request_idx].size() + 1];
for (size_t j = 0; j < is_weighted_bucket[request_idx].size(); j++)
weighted[j] = is_weighted_bucket[request_idx][j];
closure->request(request_idx)
->add_params((char *)weighted,
sizeof(bool) * is_weighted_bucket[request_idx].size());
}
// PsService_Stub rpc_stub(get_cmd_channel(server_index));
GraphPsService_Stub rpc_stub =
getServiceStub(get_cmd_channel(server_index));
closure->cntl(request_idx)->set_log_id(butil::gettimeofday_ms());
rpc_stub.service(closure->cntl(request_idx), closure->request(request_idx),
closure->response(request_idx), closure);
}
return fut;
}
std::future<int32_t> GraphBrpcClient::remove_graph_node(
uint32_t table_id, std::vector<uint64_t> &node_id_list) {
std::vector<std::vector<uint64_t>> request_bucket;
std::vector<int> server_index_arr;
std::vector<int> index_mapping(server_size, -1);
for (size_t query_idx = 0; query_idx < node_id_list.size(); ++query_idx) {
int server_index = get_server_index_by_id(node_id_list[query_idx]);
if (index_mapping[server_index] == -1) {
index_mapping[server_index] = request_bucket.size();
server_index_arr.push_back(server_index);
request_bucket.push_back(std::vector<uint64_t>());
}
request_bucket[index_mapping[server_index]].push_back(
node_id_list[query_idx]);
}
size_t request_call_num = request_bucket.size();
DownpourBrpcClosure *closure = new DownpourBrpcClosure(
request_call_num, [&, request_call_num](void *done) {
int ret = 0;
auto *closure = (DownpourBrpcClosure *)done;
int fail_num = 0;
for (size_t request_idx = 0; request_idx < request_call_num;
++request_idx) {
if (closure->check_response(request_idx,
PS_GRAPH_REMOVE_GRAPH_NODE) != 0) {
++fail_num;
}
}
ret = fail_num == request_call_num ? -1 : 0;
closure->set_promise_value(ret);
});
auto promise = std::make_shared<std::promise<int32_t>>();
closure->add_promise(promise);
std::future<int> fut = promise->get_future();
for (size_t request_idx = 0; request_idx < request_call_num; ++request_idx) {
int server_index = server_index_arr[request_idx];
closure->request(request_idx)->set_cmd_id(PS_GRAPH_REMOVE_GRAPH_NODE);
closure->request(request_idx)->set_table_id(table_id);
closure->request(request_idx)->set_client_id(_client_id);
size_t node_num = request_bucket[request_idx].size();
closure->request(request_idx)
->add_params((char *)request_bucket[request_idx].data(),
sizeof(uint64_t) * node_num);
// PsService_Stub rpc_stub(get_cmd_channel(server_index));
GraphPsService_Stub rpc_stub =
getServiceStub(get_cmd_channel(server_index));
closure->cntl(request_idx)->set_log_id(butil::gettimeofday_ms());
rpc_stub.service(closure->cntl(request_idx), closure->request(request_idx),
closure->response(request_idx), closure);
}
return fut;
}
// char* &buffer,int &actual_size // char* &buffer,int &actual_size
std::future<int32_t> GraphBrpcClient::batch_sample_neighboors( std::future<int32_t> GraphBrpcClient::batch_sample_neighboors(
uint32_t table_id, std::vector<uint64_t> node_ids, int sample_size, uint32_t table_id, std::vector<uint64_t> node_ids, int sample_size,
...@@ -174,8 +331,8 @@ std::future<int32_t> GraphBrpcClient::batch_sample_neighboors( ...@@ -174,8 +331,8 @@ std::future<int32_t> GraphBrpcClient::batch_sample_neighboors(
[&, node_id_buckets, query_idx_buckets, request_call_num](void *done) { [&, node_id_buckets, query_idx_buckets, request_call_num](void *done) {
int ret = 0; int ret = 0;
auto *closure = (DownpourBrpcClosure *)done; auto *closure = (DownpourBrpcClosure *)done;
int fail_num = 0; size_t fail_num = 0;
for (int request_idx = 0; request_idx < request_call_num; for (size_t request_idx = 0; request_idx < request_call_num;
++request_idx) { ++request_idx) {
if (closure->check_response(request_idx, if (closure->check_response(request_idx,
PS_GRAPH_SAMPLE_NEIGHBOORS) != 0) { PS_GRAPH_SAMPLE_NEIGHBOORS) != 0) {
...@@ -254,13 +411,14 @@ std::future<int32_t> GraphBrpcClient::random_sample_nodes( ...@@ -254,13 +411,14 @@ std::future<int32_t> GraphBrpcClient::random_sample_nodes(
auto &res_io_buffer = closure->cntl(0)->response_attachment(); auto &res_io_buffer = closure->cntl(0)->response_attachment();
butil::IOBufBytesIterator io_buffer_itr(res_io_buffer); butil::IOBufBytesIterator io_buffer_itr(res_io_buffer);
size_t bytes_size = io_buffer_itr.bytes_left(); size_t bytes_size = io_buffer_itr.bytes_left();
char buffer[bytes_size]; char *buffer = new char[bytes_size];
auto size = io_buffer_itr.copy_and_forward((void *)(buffer), bytes_size); auto size = io_buffer_itr.copy_and_forward((void *)(buffer), bytes_size);
int index = 0; int index = 0;
while (index < bytes_size) { while (index < bytes_size) {
ids.push_back(*(uint64_t *)(buffer + index)); ids.push_back(*(uint64_t *)(buffer + index));
index += GraphNode::id_size; index += GraphNode::id_size;
} }
delete[] buffer;
} }
closure->set_promise_value(ret); closure->set_promise_value(ret);
}); });
...@@ -292,7 +450,7 @@ std::future<int32_t> GraphBrpcClient::pull_graph_list( ...@@ -292,7 +450,7 @@ std::future<int32_t> GraphBrpcClient::pull_graph_list(
auto &res_io_buffer = closure->cntl(0)->response_attachment(); auto &res_io_buffer = closure->cntl(0)->response_attachment();
butil::IOBufBytesIterator io_buffer_itr(res_io_buffer); butil::IOBufBytesIterator io_buffer_itr(res_io_buffer);
size_t bytes_size = io_buffer_itr.bytes_left(); size_t bytes_size = io_buffer_itr.bytes_left();
char buffer[bytes_size]; char *buffer = new char[bytes_size];
io_buffer_itr.copy_and_forward((void *)(buffer), bytes_size); io_buffer_itr.copy_and_forward((void *)(buffer), bytes_size);
int index = 0; int index = 0;
while (index < bytes_size) { while (index < bytes_size) {
...@@ -301,6 +459,7 @@ std::future<int32_t> GraphBrpcClient::pull_graph_list( ...@@ -301,6 +459,7 @@ std::future<int32_t> GraphBrpcClient::pull_graph_list(
index += node.get_size(false); index += node.get_size(false);
res.push_back(node); res.push_back(node);
} }
delete buffer;
} }
closure->set_promise_value(ret); closure->set_promise_value(ret);
}); });
......
...@@ -78,6 +78,13 @@ class GraphBrpcClient : public BrpcPsClient { ...@@ -78,6 +78,13 @@ class GraphBrpcClient : public BrpcPsClient {
const uint32_t& table_id, const std::vector<uint64_t>& node_ids, const uint32_t& table_id, const std::vector<uint64_t>& node_ids,
const std::vector<std::string>& feature_names, const std::vector<std::string>& feature_names,
std::vector<std::vector<std::string>>& res); std::vector<std::vector<std::string>>& res);
virtual std::future<int32_t> clear_nodes(uint32_t table_id);
virtual std::future<int32_t> add_graph_node(
uint32_t table_id, std::vector<uint64_t>& node_id_list,
std::vector<bool>& is_weighted_list);
virtual std::future<int32_t> remove_graph_node(
uint32_t table_id, std::vector<uint64_t>& node_id_list);
virtual int32_t initialize(); virtual int32_t initialize();
int get_shard_num() { return shard_num; } int get_shard_num() { return shard_num; }
void set_shard_num(int shard_num) { this->shard_num = shard_num; } void set_shard_num(int shard_num) { this->shard_num = shard_num; }
......
...@@ -24,6 +24,14 @@ ...@@ -24,6 +24,14 @@
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
#define CHECK_TABLE_EXIST(table, request, response) \
if (table == NULL) { \
std::string err_msg("table not found with table_id:"); \
err_msg.append(std::to_string(request.table_id())); \
set_response_code(response, -1, err_msg.c_str()); \
return -1; \
}
int32_t GraphBrpcServer::initialize() { int32_t GraphBrpcServer::initialize() {
auto &service_config = _config.downpour_server_param().service_param(); auto &service_config = _config.downpour_server_param().service_param();
if (!service_config.has_service_class()) { if (!service_config.has_service_class()) {
...@@ -71,6 +79,58 @@ uint64_t GraphBrpcServer::start(const std::string &ip, uint32_t port) { ...@@ -71,6 +79,58 @@ uint64_t GraphBrpcServer::start(const std::string &ip, uint32_t port) {
return 0; return 0;
} }
int32_t GraphBrpcService::clear_nodes(Table *table,
const PsRequestMessage &request,
PsResponseMessage &response,
brpc::Controller *cntl) {
((GraphTable *)table)->clear_nodes();
return 0;
}
int32_t GraphBrpcService::add_graph_node(Table *table,
const PsRequestMessage &request,
PsResponseMessage &response,
brpc::Controller *cntl) {
CHECK_TABLE_EXIST(table, request, response)
if (request.params_size() < 1) {
set_response_code(
response, -1,
"graph_get_node_feat request requires at least 2 arguments");
return 0;
}
size_t node_num = request.params(0).size() / sizeof(uint64_t);
uint64_t *node_data = (uint64_t *)(request.params(0).c_str());
std::vector<uint64_t> node_ids(node_data, node_data + node_num);
std::vector<bool> is_weighted_list;
if (request.params_size() == 2) {
size_t weight_list_size = request.params(1).size() / sizeof(bool);
bool *is_weighted_buffer = (bool *)(request.params(1).c_str());
is_weighted_list = std::vector<bool>(is_weighted_buffer,
is_weighted_buffer + weight_list_size);
}
((GraphTable *)table)->add_graph_node(node_ids, is_weighted_list);
return 0;
}
int32_t GraphBrpcService::remove_graph_node(Table *table,
const PsRequestMessage &request,
PsResponseMessage &response,
brpc::Controller *cntl) {
CHECK_TABLE_EXIST(table, request, response)
if (request.params_size() < 1) {
set_response_code(
response, -1,
"graph_get_node_feat request requires at least 1 argument");
return 0;
}
size_t node_num = request.params(0).size() / sizeof(uint64_t);
uint64_t *node_data = (uint64_t *)(request.params(0).c_str());
std::vector<uint64_t> node_ids(node_data, node_data + node_num);
((GraphTable *)table)->remove_graph_node(node_ids);
return 0;
}
int32_t GraphBrpcServer::port() { return _server.listen_address().port; } int32_t GraphBrpcServer::port() { return _server.listen_address().port; }
int32_t GraphBrpcService::initialize() { int32_t GraphBrpcService::initialize() {
...@@ -92,21 +152,17 @@ int32_t GraphBrpcService::initialize() { ...@@ -92,21 +152,17 @@ int32_t GraphBrpcService::initialize() {
&GraphBrpcService::graph_random_sample_nodes; &GraphBrpcService::graph_random_sample_nodes;
_service_handler_map[PS_GRAPH_GET_NODE_FEAT] = _service_handler_map[PS_GRAPH_GET_NODE_FEAT] =
&GraphBrpcService::graph_get_node_feat; &GraphBrpcService::graph_get_node_feat;
_service_handler_map[PS_GRAPH_CLEAR] = &GraphBrpcService::clear_nodes;
_service_handler_map[PS_GRAPH_ADD_GRAPH_NODE] =
&GraphBrpcService::add_graph_node;
_service_handler_map[PS_GRAPH_REMOVE_GRAPH_NODE] =
&GraphBrpcService::remove_graph_node;
// shard初始化,server启动后才可从env获取到server_list的shard信息 // shard初始化,server启动后才可从env获取到server_list的shard信息
initialize_shard_info(); initialize_shard_info();
return 0; return 0;
} }
#define CHECK_TABLE_EXIST(table, request, response) \
if (table == NULL) { \
std::string err_msg("table not found with table_id:"); \
err_msg.append(std::to_string(request.table_id())); \
set_response_code(response, -1, err_msg.c_str()); \
return -1; \
}
int32_t GraphBrpcService::initialize_shard_info() { int32_t GraphBrpcService::initialize_shard_info() {
if (!_is_initialize_shard_info) { if (!_is_initialize_shard_info) {
std::lock_guard<std::mutex> guard(_initialize_shard_mutex); std::lock_guard<std::mutex> guard(_initialize_shard_mutex);
......
...@@ -86,6 +86,13 @@ class GraphBrpcService : public PsBaseService { ...@@ -86,6 +86,13 @@ class GraphBrpcService : public PsBaseService {
int32_t graph_get_node_feat(Table *table, const PsRequestMessage &request, int32_t graph_get_node_feat(Table *table, const PsRequestMessage &request,
PsResponseMessage &response, PsResponseMessage &response,
brpc::Controller *cntl); brpc::Controller *cntl);
int32_t clear_nodes(Table *table, const PsRequestMessage &request,
PsResponseMessage &response, brpc::Controller *cntl);
int32_t add_graph_node(Table *table, const PsRequestMessage &request,
PsResponseMessage &response, brpc::Controller *cntl);
int32_t remove_graph_node(Table *table, const PsRequestMessage &request,
PsResponseMessage &response,
brpc::Controller *cntl);
int32_t barrier(Table *table, const PsRequestMessage &request, int32_t barrier(Table *table, const PsRequestMessage &request,
PsResponseMessage &response, brpc::Controller *cntl); PsResponseMessage &response, brpc::Controller *cntl);
int32_t load_one_table(Table *table, const PsRequestMessage &request, int32_t load_one_table(Table *table, const PsRequestMessage &request,
......
...@@ -44,6 +44,9 @@ void GraphPyService::add_table_feat_conf(std::string table_name, ...@@ -44,6 +44,9 @@ void GraphPyService::add_table_feat_conf(std::string table_name,
} }
} }
void add_graph_node(std::vector<uint64_t> node_ids,
std::vector<bool> weight_list) {}
void remove_graph_node(std::vector<uint64_t> node_ids) {}
void GraphPyService::set_up(std::string ips_str, int shard_num, void GraphPyService::set_up(std::string ips_str, int shard_num,
std::vector<std::string> node_types, std::vector<std::string> node_types,
std::vector<std::string> edge_types) { std::vector<std::string> edge_types) {
...@@ -247,6 +250,34 @@ void GraphPyClient::load_edge_file(std::string name, std::string filepath, ...@@ -247,6 +250,34 @@ void GraphPyClient::load_edge_file(std::string name, std::string filepath,
} }
} }
void GraphPyClient::clear_nodes(std::string name) {
if (this->table_id_map.count(name)) {
uint32_t table_id = this->table_id_map[name];
auto status = get_ps_client()->clear_nodes(table_id);
status.wait();
}
}
void GraphPyClient::add_graph_node(std::string name,
std::vector<uint64_t>& node_ids,
std::vector<bool>& weight_list) {
if (this->table_id_map.count(name)) {
uint32_t table_id = this->table_id_map[name];
auto status =
get_ps_client()->add_graph_node(table_id, node_ids, weight_list);
status.wait();
}
}
void GraphPyClient::remove_graph_node(std::string name,
std::vector<uint64_t>& node_ids) {
if (this->table_id_map.count(name)) {
uint32_t table_id = this->table_id_map[name];
auto status = get_ps_client()->remove_graph_node(table_id, node_ids);
status.wait();
}
}
void GraphPyClient::load_node_file(std::string name, std::string filepath) { void GraphPyClient::load_node_file(std::string name, std::string filepath) {
// 'n' means load nodes and 'node_type' follows // 'n' means load nodes and 'node_type' follows
std::string params = "n" + name; std::string params = "n" + name;
......
...@@ -141,6 +141,10 @@ class GraphPyClient : public GraphPyService { ...@@ -141,6 +141,10 @@ class GraphPyClient : public GraphPyService {
void finalize_worker(); void finalize_worker();
void load_edge_file(std::string name, std::string filepath, bool reverse); void load_edge_file(std::string name, std::string filepath, bool reverse);
void load_node_file(std::string name, std::string filepath); void load_node_file(std::string name, std::string filepath);
void clear_nodes(std::string name);
void add_graph_node(std::string name, std::vector<uint64_t>& node_ids,
std::vector<bool>& weight_list);
void remove_graph_node(std::string name, std::vector<uint64_t>& node_ids);
int get_client_id() { return client_id; } int get_client_id() { return client_id; }
void set_client_id(int client_id) { this->client_id = client_id; } void set_client_id(int client_id) { this->client_id = client_id; }
void start_client(); void start_client();
......
...@@ -42,17 +42,17 @@ int32_t PsLocalClient::initialize() { ...@@ -42,17 +42,17 @@ int32_t PsLocalClient::initialize() {
::std::future<int32_t> PsLocalClient::load(const std::string& epoch, ::std::future<int32_t> PsLocalClient::load(const std::string& epoch,
const std::string& mode) { const std::string& mode) {
// TODO // TODO
// for (auto& it : _table_map) { for (auto& it : _table_map) {
// load(it.first, epoch, mode); load(it.first, epoch, mode);
//} }
return done(); return done();
} }
::std::future<int32_t> PsLocalClient::load(uint32_t table_id, ::std::future<int32_t> PsLocalClient::load(uint32_t table_id,
const std::string& epoch, const std::string& epoch,
const std::string& mode) { const std::string& mode) {
// TODO // TODO
// auto* table_ptr = table(table_id); auto* table_ptr = table(table_id);
// table_ptr->load(epoch, mode); table_ptr->load(epoch, mode);
return done(); return done();
} }
...@@ -245,7 +245,6 @@ int32_t PsLocalClient::initialize() { ...@@ -245,7 +245,6 @@ int32_t PsLocalClient::initialize() {
::std::future<int32_t> PsLocalClient::push_sparse_raw_gradient( ::std::future<int32_t> PsLocalClient::push_sparse_raw_gradient(
size_t table_id, const uint64_t* keys, const float** update_values, size_t table_id, const uint64_t* keys, const float** update_values,
size_t num, void* callback) { size_t num, void* callback) {
VLOG(1) << "wxx push_sparse_raw_gradient";
PSClientClosure* closure = reinterpret_cast<PSClientClosure*>(callback); PSClientClosure* closure = reinterpret_cast<PSClientClosure*>(callback);
auto* accessor = table_accessor(table_id); auto* accessor = table_accessor(table_id);
auto* table_ptr = table(table_id); auto* table_ptr = table(table_id);
......
...@@ -26,9 +26,14 @@ class PsLocalServer : public PSServer { ...@@ -26,9 +26,14 @@ class PsLocalServer : public PSServer {
PsLocalServer() {} PsLocalServer() {}
virtual ~PsLocalServer() {} virtual ~PsLocalServer() {}
virtual uint64_t start() { return 0; } virtual uint64_t start() { return 0; }
virtual uint64_t start(const std::string& ip, uint32_t port) { return 0; } virtual uint64_t start(const std::string &ip, uint32_t port) { return 0; }
virtual int32_t stop() { return 0; } virtual int32_t stop() { return 0; }
virtual int32_t port() { return 0; } virtual int32_t port() { return 0; }
virtual int32_t configure(
const PSParameter &config, PSEnvironment &env, size_t server_rank,
const std::vector<framework::ProgramDesc> &server_sub_program = {}) {
return 0;
}
private: private:
virtual int32_t initialize() { return 0; } virtual int32_t initialize() { return 0; }
......
...@@ -52,6 +52,9 @@ enum PsCmdID { ...@@ -52,6 +52,9 @@ enum PsCmdID {
PS_GRAPH_SAMPLE_NEIGHBOORS = 31; PS_GRAPH_SAMPLE_NEIGHBOORS = 31;
PS_GRAPH_SAMPLE_NODES = 32; PS_GRAPH_SAMPLE_NODES = 32;
PS_GRAPH_GET_NODE_FEAT = 33; PS_GRAPH_GET_NODE_FEAT = 33;
PS_GRAPH_CLEAR = 34;
PS_GRAPH_ADD_GRAPH_NODE = 35;
PS_GRAPH_REMOVE_GRAPH_NODE = 36;
} }
message PsRequestMessage { message PsRequestMessage {
......
...@@ -70,7 +70,7 @@ class PSServer { ...@@ -70,7 +70,7 @@ class PSServer {
virtual int32_t configure( virtual int32_t configure(
const PSParameter &config, PSEnvironment &env, size_t server_rank, const PSParameter &config, PSEnvironment &env, size_t server_rank,
const std::vector<framework::ProgramDesc> &server_sub_program = {}) final; const std::vector<framework::ProgramDesc> &server_sub_program = {});
// return server_ip // return server_ip
virtual std::string ip() { return butil::my_ip_cstr(); } virtual std::string ip() { return butil::my_ip_cstr(); }
......
...@@ -9,11 +9,24 @@ set_source_files_properties(${graphDir}/graph_node.cc PROPERTIES COMPILE_FLAGS $ ...@@ -9,11 +9,24 @@ set_source_files_properties(${graphDir}/graph_node.cc PROPERTIES COMPILE_FLAGS $
cc_library(graph_node SRCS ${graphDir}/graph_node.cc DEPS WeightedSampler) cc_library(graph_node SRCS ${graphDir}/graph_node.cc DEPS WeightedSampler)
set_source_files_properties(common_dense_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(common_dense_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(common_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(common_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(ssd_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(barrier_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(barrier_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(common_graph_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(common_graph_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_library(common_table SRCS common_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc DEPS ${TABLE_DEPS} graph_edge graph_node device_context string_helper simple_threadpool xxhash generator) get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS)
set(EXTERN_DEP "")
if(WITH_HETERPS)
set(TABLE_SRC common_sparse_table.cc ssd_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc)
set(EXTERN_DEP rocksdb)
else()
set(TABLE_SRC common_sparse_table.cc common_dense_table.cc sparse_geo_table.cc barrier_table.cc common_graph_table.cc)
endif()
cc_library(common_table SRCS ${TABLE_SRC} DEPS ${TABLE_DEPS}
${RPC_DEPS} graph_edge graph_node device_context string_helper
simple_threadpool xxhash generator ${EXTERN_DEP})
set_source_files_properties(tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(tensor_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(tensor_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
......
...@@ -15,12 +15,15 @@ ...@@ -15,12 +15,15 @@
#include "paddle/fluid/distributed/table/common_graph_table.h" #include "paddle/fluid/distributed/table/common_graph_table.h"
#include <time.h> #include <time.h>
#include <algorithm> #include <algorithm>
#include <chrono>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "paddle/fluid/distributed/common/utils.h" #include "paddle/fluid/distributed/common/utils.h"
#include "paddle/fluid/distributed/table/graph/graph_node.h" #include "paddle/fluid/distributed/table/graph/graph_node.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/string_helper.h" #include "paddle/fluid/string/string_helper.h"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
...@@ -35,6 +38,77 @@ std::vector<Node *> GraphShard::get_batch(int start, int end, int step) { ...@@ -35,6 +38,77 @@ std::vector<Node *> GraphShard::get_batch(int start, int end, int step) {
size_t GraphShard::get_size() { return bucket.size(); } size_t GraphShard::get_size() { return bucket.size(); }
int32_t GraphTable::add_graph_node(std::vector<uint64_t> &id_list,
std::vector<bool> &is_weight_list) {
size_t node_size = id_list.size();
std::vector<std::vector<std::pair<uint64_t, bool>>> batch(task_pool_size_);
for (size_t i = 0; i < node_size; i++) {
size_t shard_id = id_list[i] % shard_num;
if (shard_id >= shard_end || shard_id < shard_start) {
continue;
}
batch[get_thread_pool_index(id_list[i])].push_back(
{id_list[i], i < is_weight_list.size() ? is_weight_list[i] : false});
}
std::vector<std::future<int>> tasks;
for (size_t i = 0; i < batch.size(); ++i) {
if (!batch[i].size()) continue;
tasks.push_back(_shards_task_pool[i]->enqueue([&batch, i, this]() -> int {
for (auto &p : batch[i]) {
size_t index = p.first % this->shard_num - this->shard_start;
this->shards[index].add_graph_node(p.first)->build_edges(p.second);
}
return 0;
}));
}
for (size_t i = 0; i < tasks.size(); i++) tasks[i].get();
return 0;
}
int32_t GraphTable::remove_graph_node(std::vector<uint64_t> &id_list) {
size_t node_size = id_list.size();
std::vector<std::vector<uint64_t>> batch(task_pool_size_);
for (size_t i = 0; i < node_size; i++) {
size_t shard_id = id_list[i] % shard_num;
if (shard_id >= shard_end || shard_id < shard_start) continue;
batch[get_thread_pool_index(id_list[i])].push_back(id_list[i]);
}
std::vector<std::future<int>> tasks;
for (size_t i = 0; i < batch.size(); ++i) {
if (!batch[i].size()) continue;
tasks.push_back(_shards_task_pool[i]->enqueue([&batch, i, this]() -> int {
for (auto &p : batch[i]) {
size_t index = p % this->shard_num - this->shard_start;
this->shards[index].delete_node(p);
}
return 0;
}));
}
for (size_t i = 0; i < tasks.size(); i++) tasks[i].get();
return 0;
}
void GraphShard::clear() {
for (size_t i = 0; i < bucket.size(); i++) {
delete bucket[i];
}
bucket.clear();
node_location.clear();
}
GraphShard::~GraphShard() { clear(); }
void GraphShard::delete_node(uint64_t id) {
auto iter = node_location.find(id);
if (iter == node_location.end()) return;
int pos = iter->second;
delete bucket[pos];
if (pos != (int)bucket.size() - 1) {
bucket[pos] = bucket.back();
node_location[bucket.back()->get_id()] = pos;
}
node_location.erase(id);
bucket.pop_back();
}
GraphNode *GraphShard::add_graph_node(uint64_t id) { GraphNode *GraphShard::add_graph_node(uint64_t id) {
if (node_location.find(id) == node_location.end()) { if (node_location.find(id) == node_location.end()) {
node_location[id] = bucket.size(); node_location[id] = bucket.size();
...@@ -79,11 +153,7 @@ int32_t GraphTable::get_nodes_ids_by_ranges( ...@@ -79,11 +153,7 @@ int32_t GraphTable::get_nodes_ids_by_ranges(
int start = 0, end, index = 0, total_size = 0; int start = 0, end, index = 0, total_size = 0;
res.clear(); res.clear();
std::vector<std::future<std::vector<uint64_t>>> tasks; std::vector<std::future<std::vector<uint64_t>>> tasks;
// std::string temp = ""; for (size_t i = 0; i < shards.size() && index < (int)ranges.size(); i++) {
// for(int i = 0;i < shards.size();i++)
// temp+= std::to_string((int)shards[i].get_size()) + " ";
// VLOG(0)<<"range distribution "<<temp;
for (int i = 0; i < shards.size() && index < ranges.size(); i++) {
end = total_size + shards[i].get_size(); end = total_size + shards[i].get_size();
start = total_size; start = total_size;
while (start < end && index < ranges.size()) { while (start < end && index < ranges.size()) {
...@@ -97,7 +167,6 @@ int32_t GraphTable::get_nodes_ids_by_ranges( ...@@ -97,7 +167,6 @@ int32_t GraphTable::get_nodes_ids_by_ranges(
start = second; start = second;
first -= total_size; first -= total_size;
second -= total_size; second -= total_size;
// VLOG(0)<<" FIND RANGE "<<i<<" "<<first<<" "<<second;
tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue( tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue(
[this, first, second, i]() -> std::vector<uint64_t> { [this, first, second, i]() -> std::vector<uint64_t> {
return shards[i].get_ids_by_range(first, second); return shards[i].get_ids_by_range(first, second);
...@@ -106,7 +175,7 @@ int32_t GraphTable::get_nodes_ids_by_ranges( ...@@ -106,7 +175,7 @@ int32_t GraphTable::get_nodes_ids_by_ranges(
} }
total_size += shards[i].get_size(); total_size += shards[i].get_size();
} }
for (int i = 0; i < tasks.size(); i++) { for (size_t i = 0; i < tasks.size(); i++) {
auto vec = tasks[i].get(); auto vec = tasks[i].get();
for (auto &id : vec) { for (auto &id : vec) {
res.push_back(id); res.push_back(id);
...@@ -219,7 +288,7 @@ int32_t GraphTable::load_edges(const std::string &path, bool reverse_edge) { ...@@ -219,7 +288,7 @@ int32_t GraphTable::load_edges(const std::string &path, bool reverse_edge) {
for (auto &shard : shards) { for (auto &shard : shards) {
auto bucket = shard.get_bucket(); auto bucket = shard.get_bucket();
for (int i = 0; i < bucket.size(); i++) { for (size_t i = 0; i < bucket.size(); i++) {
bucket[i]->build_sampler(sample_type); bucket[i]->build_sampler(sample_type);
} }
} }
...@@ -238,10 +307,29 @@ Node *GraphTable::find_node(uint64_t id) { ...@@ -238,10 +307,29 @@ Node *GraphTable::find_node(uint64_t id) {
uint32_t GraphTable::get_thread_pool_index(uint64_t node_id) { uint32_t GraphTable::get_thread_pool_index(uint64_t node_id) {
return node_id % shard_num % shard_num_per_table % task_pool_size_; return node_id % shard_num % shard_num_per_table % task_pool_size_;
} }
uint32_t GraphTable::get_thread_pool_index_by_shard_index(
uint64_t shard_index) {
return shard_index % shard_num_per_table % task_pool_size_;
}
int32_t GraphTable::clear_nodes() {
std::vector<std::future<int>> tasks;
for (size_t i = 0; i < shards.size(); i++) {
tasks.push_back(
_shards_task_pool[get_thread_pool_index_by_shard_index(i)]->enqueue(
[this, i]() -> int {
this->shards[i].clear();
return 0;
}));
}
for (size_t i = 0; i < tasks.size(); i++) tasks[i].get();
return 0;
}
int32_t GraphTable::random_sample_nodes(int sample_size, int32_t GraphTable::random_sample_nodes(int sample_size,
std::unique_ptr<char[]> &buffer, std::unique_ptr<char[]> &buffer,
int &actual_size) { int &actual_size) {
bool need_feature = false;
int total_size = 0; int total_size = 0;
for (int i = 0; i < shards.size(); i++) { for (int i = 0; i < shards.size(); i++) {
total_size += shards[i].get_size(); total_size += shards[i].get_size();
...@@ -281,7 +369,7 @@ int32_t GraphTable::random_sample_nodes(int sample_size, ...@@ -281,7 +369,7 @@ int32_t GraphTable::random_sample_nodes(int sample_size,
} }
std::vector<std::pair<int, int>> first_half, second_half; std::vector<std::pair<int, int>> first_half, second_half;
int start_index = rand() % total_size; int start_index = rand() % total_size;
for (int i = 0; i < ranges_len.size() && i < ranges_pos.size(); i++) { for (size_t i = 0; i < ranges_len.size() && i < ranges_pos.size(); i++) {
if (ranges_pos[i] + ranges_len[i] - 1 + start_index < total_size) if (ranges_pos[i] + ranges_len[i] - 1 + start_index < total_size)
first_half.push_back({ranges_pos[i] + start_index, first_half.push_back({ranges_pos[i] + start_index,
ranges_pos[i] + ranges_len[i] + start_index}); ranges_pos[i] + ranges_len[i] + start_index});
...@@ -314,31 +402,34 @@ int32_t GraphTable::random_sample_neighboors( ...@@ -314,31 +402,34 @@ int32_t GraphTable::random_sample_neighboors(
uint64_t &node_id = node_ids[idx]; uint64_t &node_id = node_ids[idx];
std::unique_ptr<char[]> &buffer = buffers[idx]; std::unique_ptr<char[]> &buffer = buffers[idx];
int &actual_size = actual_sizes[idx]; int &actual_size = actual_sizes[idx];
tasks.push_back(_shards_task_pool[get_thread_pool_index(node_id)]->enqueue(
[&]() -> int {
Node *node = find_node(node_id);
if (node == nullptr) { int thread_pool_index = get_thread_pool_index(node_id);
actual_size = 0; auto rng = _shards_task_rng_pool[thread_pool_index];
return 0;
} tasks.push_back(_shards_task_pool[thread_pool_index]->enqueue([&]() -> int {
std::vector<int> res = node->sample_k(sample_size); Node *node = find_node(node_id);
actual_size = res.size() * (Node::id_size + Node::weight_size);
int offset = 0; if (node == nullptr) {
uint64_t id; actual_size = 0;
float weight; return 0;
char *buffer_addr = new char[actual_size]; }
buffer.reset(buffer_addr); std::vector<int> res = node->sample_k(sample_size, rng);
for (int &x : res) { actual_size = res.size() * (Node::id_size + Node::weight_size);
id = node->get_neighbor_id(x); int offset = 0;
weight = node->get_neighbor_weight(x); uint64_t id;
memcpy(buffer_addr + offset, &id, Node::id_size); float weight;
offset += Node::id_size; char *buffer_addr = new char[actual_size];
memcpy(buffer_addr + offset, &weight, Node::weight_size); buffer.reset(buffer_addr);
offset += Node::weight_size; for (int &x : res) {
} id = node->get_neighbor_id(x);
return 0; weight = node->get_neighbor_weight(x);
})); memcpy(buffer_addr + offset, &id, Node::id_size);
offset += Node::id_size;
memcpy(buffer_addr + offset, &weight, Node::weight_size);
offset += Node::weight_size;
}
return 0;
}));
} }
for (size_t idx = 0; idx < node_num; ++idx) { for (size_t idx = 0; idx < node_num; ++idx) {
tasks[idx].get(); tasks[idx].get();
...@@ -386,7 +477,6 @@ std::pair<int32_t, std::string> GraphTable::parse_feature( ...@@ -386,7 +477,6 @@ std::pair<int32_t, std::string> GraphTable::parse_feature(
if (this->feat_id_map.count(fields[0])) { if (this->feat_id_map.count(fields[0])) {
int32_t id = this->feat_id_map[fields[0]]; int32_t id = this->feat_id_map[fields[0]];
std::string dtype = this->feat_dtype[id]; std::string dtype = this->feat_dtype[id];
int32_t shape = this->feat_shape[id];
std::vector<std::string> values(fields.begin() + 1, fields.end()); std::vector<std::string> values(fields.begin() + 1, fields.end());
if (dtype == "feasign") { if (dtype == "feasign") {
return std::make_pair<int32_t, std::string>( return std::make_pair<int32_t, std::string>(
...@@ -428,7 +518,6 @@ int32_t GraphTable::pull_graph_list(int start, int total_size, ...@@ -428,7 +518,6 @@ int32_t GraphTable::pull_graph_list(int start, int total_size,
int end = start + (count - 1) * step + 1; int end = start + (count - 1) * step + 1;
tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue( tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue(
[this, i, start, end, step, size]() -> std::vector<Node *> { [this, i, start, end, step, size]() -> std::vector<Node *> {
return this->shards[i].get_batch(start - size, end - size, step); return this->shards[i].get_batch(start - size, end - size, step);
})); }));
start += count * step; start += count * step;
...@@ -462,6 +551,7 @@ int32_t GraphTable::initialize() { ...@@ -462,6 +551,7 @@ int32_t GraphTable::initialize() {
_shards_task_pool.resize(task_pool_size_); _shards_task_pool.resize(task_pool_size_);
for (size_t i = 0; i < _shards_task_pool.size(); ++i) { for (size_t i = 0; i < _shards_task_pool.size(); ++i) {
_shards_task_pool[i].reset(new ::ThreadPool(1)); _shards_task_pool[i].reset(new ::ThreadPool(1));
_shards_task_rng_pool.push_back(paddle::framework::GetCPURandomEngine(0));
} }
server_num = _shard_num; server_num = _shard_num;
// VLOG(0) << "in init graph table server num = " << server_num; // VLOG(0) << "in init graph table server num = " << server_num;
...@@ -502,5 +592,5 @@ int32_t GraphTable::initialize() { ...@@ -502,5 +592,5 @@ int32_t GraphTable::initialize() {
shards = std::vector<GraphShard>(shard_num_per_table, GraphShard(shard_num)); shards = std::vector<GraphShard>(shard_num_per_table, GraphShard(shard_num));
return 0; return 0;
} }
} } // namespace distributed
}; }; // namespace paddle
...@@ -36,11 +36,12 @@ class GraphShard { ...@@ -36,11 +36,12 @@ class GraphShard {
size_t get_size(); size_t get_size();
GraphShard() {} GraphShard() {}
GraphShard(int shard_num) { this->shard_num = shard_num; } GraphShard(int shard_num) { this->shard_num = shard_num; }
~GraphShard();
std::vector<Node *> &get_bucket() { return bucket; } std::vector<Node *> &get_bucket() { return bucket; }
std::vector<Node *> get_batch(int start, int end, int step); std::vector<Node *> get_batch(int start, int end, int step);
std::vector<uint64_t> get_ids_by_range(int start, int end) { std::vector<uint64_t> get_ids_by_range(int start, int end) {
std::vector<uint64_t> res; std::vector<uint64_t> res;
for (int i = start; i < end && i < bucket.size(); i++) { for (int i = start; i < end && i < (int)bucket.size(); i++) {
res.push_back(bucket[i]->get_id()); res.push_back(bucket[i]->get_id());
} }
return res; return res;
...@@ -48,6 +49,8 @@ class GraphShard { ...@@ -48,6 +49,8 @@ class GraphShard {
GraphNode *add_graph_node(uint64_t id); GraphNode *add_graph_node(uint64_t id);
FeatureNode *add_feature_node(uint64_t id); FeatureNode *add_feature_node(uint64_t id);
Node *find_node(uint64_t id); Node *find_node(uint64_t id);
void delete_node(uint64_t id);
void clear();
void add_neighboor(uint64_t id, uint64_t dst_id, float weight); void add_neighboor(uint64_t id, uint64_t dst_id, float weight);
std::unordered_map<uint64_t, int> get_node_location() { std::unordered_map<uint64_t, int> get_node_location() {
return node_location; return node_location;
...@@ -85,6 +88,11 @@ class GraphTable : public SparseTable { ...@@ -85,6 +88,11 @@ class GraphTable : public SparseTable {
int32_t load_nodes(const std::string &path, std::string node_type); int32_t load_nodes(const std::string &path, std::string node_type);
int32_t add_graph_node(std::vector<uint64_t> &id_list,
std::vector<bool> &is_weight_list);
int32_t remove_graph_node(std::vector<uint64_t> &id_list);
Node *find_node(uint64_t id); Node *find_node(uint64_t id);
virtual int32_t pull_sparse(float *values, virtual int32_t pull_sparse(float *values,
...@@ -97,6 +105,7 @@ class GraphTable : public SparseTable { ...@@ -97,6 +105,7 @@ class GraphTable : public SparseTable {
return 0; return 0;
} }
virtual int32_t clear_nodes();
virtual void clear() {} virtual void clear() {}
virtual int32_t flush() { return 0; } virtual int32_t flush() { return 0; }
virtual int32_t shrink(const std::string &param) { return 0; } virtual int32_t shrink(const std::string &param) { return 0; }
...@@ -105,6 +114,7 @@ class GraphTable : public SparseTable { ...@@ -105,6 +114,7 @@ class GraphTable : public SparseTable {
return 0; return 0;
} }
virtual int32_t initialize_shard() { return 0; } virtual int32_t initialize_shard() { return 0; }
virtual uint32_t get_thread_pool_index_by_shard_index(uint64_t shard_index);
virtual uint32_t get_thread_pool_index(uint64_t node_id); virtual uint32_t get_thread_pool_index(uint64_t node_id);
virtual std::pair<int32_t, std::string> parse_feature(std::string feat_str); virtual std::pair<int32_t, std::string> parse_feature(std::string feat_str);
...@@ -126,6 +136,8 @@ class GraphTable : public SparseTable { ...@@ -126,6 +136,8 @@ class GraphTable : public SparseTable {
std::string table_type; std::string table_type;
std::vector<std::shared_ptr<::ThreadPool>> _shards_task_pool; std::vector<std::shared_ptr<::ThreadPool>> _shards_task_pool;
std::vector<std::shared_ptr<std::mt19937_64>> _shards_task_rng_pool;
}; };
} // namespace distributed } // namespace distributed
}; // namespace paddle }; // namespace paddle
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/distributed/table/common_sparse_table.h" #include "paddle/fluid/distributed/table/common_sparse_table.h"
#include <sstream> #include <sstream>
#include "boost/lexical_cast.hpp"
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -25,82 +25,12 @@ class ValueBlock; ...@@ -25,82 +25,12 @@ class ValueBlock;
} // namespace distributed } // namespace distributed
} // namespace paddle } // namespace paddle
#define PSERVER_SAVE_SUFFIX "_txt"
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
enum SaveMode { all, base, delta }; void CommonSparseTable::ProcessALine(const std::vector<std::string>& columns,
const Meta& meta, const int64_t id,
struct Meta { std::vector<std::vector<float>>* values) {
std::string param;
int shard_id;
std::vector<std::string> names;
std::vector<int> dims;
uint64_t count;
std::unordered_map<std::string, int> dims_map;
explicit Meta(const std::string& metapath) {
std::ifstream file(metapath);
std::string line;
int num_lines = 0;
while (std::getline(file, line)) {
if (StartWith(line, "#")) {
continue;
}
auto pairs = paddle::string::split_string<std::string>(line, "=");
PADDLE_ENFORCE_EQ(
pairs.size(), 2,
paddle::platform::errors::InvalidArgument(
"info in %s except k=v, but got %s", metapath, line));
if (pairs[0] == "param") {
param = pairs[1];
}
if (pairs[0] == "shard_id") {
shard_id = std::stoi(pairs[1]);
}
if (pairs[0] == "row_names") {
names = paddle::string::split_string<std::string>(pairs[1], ",");
}
if (pairs[0] == "row_dims") {
auto dims_strs =
paddle::string::split_string<std::string>(pairs[1], ",");
for (auto& str : dims_strs) {
dims.push_back(std::stoi(str));
}
}
if (pairs[0] == "count") {
count = std::stoull(pairs[1]);
}
}
for (int x = 0; x < names.size(); ++x) {
dims_map[names[x]] = dims[x];
}
}
Meta(std::string param, int shard_id, std::vector<std::string> row_names,
std::vector<int> dims, uint64_t count) {
this->param = param;
this->shard_id = shard_id;
this->names = row_names;
this->dims = dims;
this->count = count;
}
std::string ToString() {
std::stringstream ss;
ss << "param=" << param << "\n";
ss << "shard_id=" << shard_id << "\n";
ss << "row_names=" << paddle::string::join_strings(names, ',') << "\n";
ss << "row_dims=" << paddle::string::join_strings(dims, ',') << "\n";
ss << "count=" << count << "\n";
return ss.str();
}
};
void ProcessALine(const std::vector<std::string>& columns, const Meta& meta,
std::vector<std::vector<float>>* values) {
auto colunmn_size = columns.size(); auto colunmn_size = columns.size();
auto load_values = auto load_values =
paddle::string::split_string<std::string>(columns[colunmn_size - 1], ","); paddle::string::split_string<std::string>(columns[colunmn_size - 1], ",");
...@@ -116,49 +46,83 @@ void ProcessALine(const std::vector<std::string>& columns, const Meta& meta, ...@@ -116,49 +46,83 @@ void ProcessALine(const std::vector<std::string>& columns, const Meta& meta,
"The data format in txt does not meet the field " "The data format in txt does not meet the field "
"requirements defined in meta")); "requirements defined in meta"));
std::transform(start, end, std::back_inserter(val), std::transform(start, end, std::back_inserter(val), [id](std::string va) {
[](std::string va) { return std::stof(va); }); float v = 0.0;
try {
v = lexical_cast<float>(va);
} catch (boost::bad_lexical_cast& e) {
VLOG(0) << "id: " << id << " get unexpected value: " << va
<< " and be reset to: 0.0";
}
return v;
});
values->push_back(val); values->push_back(val);
offset += meta.dims[x]; offset += meta.dims[x];
} }
} }
int64_t SaveToText(std::ostream* os, std::shared_ptr<ValueBlock> block, void CommonSparseTable::SaveMetaToText(std::ostream* os,
const int mode) { const CommonAccessorParameter& common,
int64_t not_save_num = 0; const size_t shard_idx,
for (auto& value : block->values_) { const int64_t total) {
if (mode == SaveMode::delta && !value.second.need_save_) { // save meta
not_save_num++; std::stringstream stream;
continue; stream << "param=" << common.table_name() << "\n";
} stream << "shard_id=" << shard_idx << "\n";
stream << "row_names=" << paddle::string::join_strings(common.params(), ',')
<< "\n";
stream << "row_dims=" << paddle::string::join_strings(common.dims(), ',')
<< "\n";
stream << "count=" << total << "\n";
os->write(stream.str().c_str(), sizeof(char) * stream.str().size());
}
auto* vs = value.second.data_; int64_t CommonSparseTable::SaveValueToText(std::ostream* os,
std::stringstream ss; std::shared_ptr<ValueBlock> block,
auto id = value.first; std::shared_ptr<::ThreadPool> pool,
ss << id << "\t" << value.second.count_ << "\t" << value.second.unseen_days_ const int mode, int shard_id) {
<< "\t" << value.second.is_entry_ << "\t"; int64_t save_num = 0;
for (auto& table : block->values_) {
for (auto& value : table) {
if (mode == SaveMode::delta && !value.second->need_save_) {
continue;
}
for (int i = 0; i < block->value_length_; i++) { ++save_num;
ss << vs[i];
ss << ","; std::stringstream ss;
} auto* vs = value.second->data_.data();
ss << "\n"; auto id = value.first;
os->write(ss.str().c_str(), sizeof(char) * ss.str().size()); ss << id << "\t" << value.second->count_ << "\t"
<< value.second->unseen_days_ << "\t" << value.second->is_entry_
<< "\t";
for (int i = 0; i < block->value_length_ - 1; i++) {
ss << std::to_string(vs[i]) << ",";
}
if (mode == SaveMode::base || mode == SaveMode::delta) { ss << std::to_string(vs[block->value_length_ - 1]);
value.second.need_save_ = false; ss << "\n";
os->write(ss.str().c_str(), sizeof(char) * ss.str().size());
if (mode == SaveMode::base || mode == SaveMode::delta) {
value.second->need_save_ = false;
}
} }
} }
return block->values_.size() - not_save_num; return save_num;
} }
int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, int64_t CommonSparseTable::LoadFromText(
const int pserver_id, const int pserver_num, const std::string& valuepath, const std::string& metapath,
const int local_shard_num, const int pserver_id, const int pserver_num, const int local_shard_num,
std::vector<std::shared_ptr<ValueBlock>>* blocks) { std::vector<std::shared_ptr<ValueBlock>>* blocks) {
Meta meta = Meta(metapath); Meta meta = Meta(metapath);
int num_lines = 0; int num_lines = 0;
...@@ -167,7 +131,7 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, ...@@ -167,7 +131,7 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath,
while (std::getline(file, line)) { while (std::getline(file, line)) {
auto values = paddle::string::split_string<std::string>(line, "\t"); auto values = paddle::string::split_string<std::string>(line, "\t");
auto id = std::stoull(values[0]); auto id = lexical_cast<uint64_t>(values[0]);
if (id % pserver_num != pserver_id) { if (id % pserver_num != pserver_id) {
VLOG(3) << "will not load " << values[0] << " from " << valuepath VLOG(3) << "will not load " << values[0] << " from " << valuepath
...@@ -179,15 +143,17 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath, ...@@ -179,15 +143,17 @@ int64_t LoadFromText(const std::string& valuepath, const std::string& metapath,
auto block = blocks->at(shard_id); auto block = blocks->at(shard_id);
std::vector<std::vector<float>> kvalues; std::vector<std::vector<float>> kvalues;
ProcessALine(values, meta, &kvalues); ProcessALine(values, meta, id, &kvalues);
block->Init(id, false); block->Init(id, false);
auto value_instant = block->GetValue(id); VALUE* value_instant = block->GetValue(id);
if (values.size() == 5) { if (values.size() == 5) {
value_instant->count_ = std::stoi(values[1]); value_instant->count_ = lexical_cast<int>(values[1]);
value_instant->unseen_days_ = std::stoi(values[2]); value_instant->unseen_days_ = lexical_cast<int>(values[2]);
value_instant->is_entry_ = static_cast<bool>(std::stoi(values[3])); value_instant->is_entry_ =
static_cast<bool>(lexical_cast<int>(values[3]));
} }
std::vector<float*> block_values = block->Get(id, meta.names, meta.dims); std::vector<float*> block_values = block->Get(id, meta.names, meta.dims);
...@@ -314,16 +280,24 @@ int32_t CommonSparseTable::set_global_lr(float* lr) { ...@@ -314,16 +280,24 @@ int32_t CommonSparseTable::set_global_lr(float* lr) {
int32_t CommonSparseTable::load(const std::string& path, int32_t CommonSparseTable::load(const std::string& path,
const std::string& param) { const std::string& param) {
auto begin = GetCurrentUS();
rwlock_->WRLock(); rwlock_->WRLock();
VLOG(3) << "sparse table load with " << path << " with meta " << param;
LoadFromText(path, param, _shard_idx, _shard_num, task_pool_size_, LoadFromText(path, param, _shard_idx, _shard_num, task_pool_size_,
&shard_values_); &shard_values_);
rwlock_->UNLock(); rwlock_->UNLock();
auto end = GetCurrentUS();
auto varname = _config.common().table_name();
VLOG(0) << "load " << varname << " with value: " << path
<< " , meta: " << param
<< " using: " << std::to_string((end - begin) / 1e+6) << " seconds";
return 0; return 0;
} }
int32_t CommonSparseTable::save(const std::string& dirname, int32_t CommonSparseTable::save(const std::string& dirname,
const std::string& param) { const std::string& param) {
auto begin = GetCurrentUS();
rwlock_->WRLock(); rwlock_->WRLock();
int mode = std::stoi(param); int mode = std::stoi(param);
VLOG(3) << "sparse table save: " << dirname << " mode: " << mode; VLOG(3) << "sparse table save: " << dirname << " mode: " << mode;
...@@ -336,36 +310,34 @@ int32_t CommonSparseTable::save(const std::string& dirname, ...@@ -336,36 +310,34 @@ int32_t CommonSparseTable::save(const std::string& dirname,
VLOG(3) << "save " << varname << " in dir: " << var_store << " begin"; VLOG(3) << "save " << varname << " in dir: " << var_store << " begin";
std::vector<std::string> params(_config.common().params().begin(), std::vector<std::string> params(_config.common().params().begin(),
_config.common().params().end()); _config.common().params().end());
std::string shard_var_pre = std::string shard_var_pre =
string::Sprintf("%s.block%d", varname, _shard_idx); string::Sprintf("%s.block%d", varname, _shard_idx);
std::string value_ = string::Sprintf("%s/%s.txt", var_store, shard_var_pre); std::string value_ = string::Sprintf("%s/%s.txt", var_store, shard_var_pre);
std::unique_ptr<std::ofstream> value_out(new std::ofstream(value_)); std::unique_ptr<std::ofstream> vs(new std::ofstream(value_));
int64_t total_ins = 0; int64_t total_ins = 0;
for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) { for (int shard_id = 0; shard_id < task_pool_size_; ++shard_id) {
// save values // save values
total_ins += SaveToText(value_out.get(), shard_values_[shard_id], mode); auto shard_save_num =
SaveValueToText(vs.get(), shard_values_[shard_id],
_shards_task_pool[shard_id], mode, shard_id);
total_ins += shard_save_num;
} }
value_out->close(); vs->close();
// save meta
std::stringstream stream;
stream << "param=" << _config.common().table_name() << "\n";
stream << "shard_id=" << _shard_idx << "\n";
stream << "row_names="
<< paddle::string::join_strings(_config.common().params(), ',')
<< "\n";
stream << "row_dims="
<< paddle::string::join_strings(_config.common().dims(), ',') << "\n";
stream << "count=" << total_ins << "\n";
std::string meta_ = string::Sprintf("%s/%s.meta", var_store, shard_var_pre); std::string meta_ = string::Sprintf("%s/%s.meta", var_store, shard_var_pre);
std::unique_ptr<std::ofstream> meta_out(new std::ofstream(meta_)); std::unique_ptr<std::ofstream> ms(new std::ofstream(meta_));
meta_out->write(stream.str().c_str(), sizeof(char) * stream.str().size()); SaveMetaToText(ms.get(), _config.common(), _shard_idx, total_ins);
meta_out->close(); ms->close();
VLOG(3) << "save " << varname << " in dir: " << var_store << " done";
auto end = GetCurrentUS();
rwlock_->UNLock(); rwlock_->UNLock();
VLOG(0) << "save " << varname << " with path: " << value_
<< " using: " << std::to_string((end - begin) / 1e+6) << " seconds";
return 0; return 0;
} }
...@@ -373,16 +345,16 @@ std::pair<int64_t, int64_t> CommonSparseTable::print_table_stat() { ...@@ -373,16 +345,16 @@ std::pair<int64_t, int64_t> CommonSparseTable::print_table_stat() {
int64_t feasign_size = 0; int64_t feasign_size = 0;
int64_t mf_size = 0; int64_t mf_size = 0;
for (auto& value : shard_values_) { for (auto& shard : shard_values_) {
feasign_size += value->values_.size(); for (auto& table : shard->values_) {
feasign_size += table.size();
}
} }
return {feasign_size, mf_size}; return {feasign_size, mf_size};
} }
int32_t CommonSparseTable::pour() { int32_t CommonSparseTable::pour() {
rwlock_->RDLock();
std::vector<float> values; std::vector<float> values;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
...@@ -399,14 +371,11 @@ int32_t CommonSparseTable::pour() { ...@@ -399,14 +371,11 @@ int32_t CommonSparseTable::pour() {
_push_sparse(keys.data(), values.data(), pull_reservoir_.size()); _push_sparse(keys.data(), values.data(), pull_reservoir_.size());
pull_reservoir_.clear(); pull_reservoir_.clear();
rwlock_->UNLock();
return 0; return 0;
} }
int32_t CommonSparseTable::pull_sparse(float* pull_values, int32_t CommonSparseTable::pull_sparse(float* pull_values,
const PullSparseValue& pull_value) { const PullSparseValue& pull_value) {
rwlock_->RDLock();
auto shard_num = task_pool_size_; auto shard_num = task_pool_size_;
std::vector<std::future<int>> tasks(shard_num); std::vector<std::future<int>> tasks(shard_num);
...@@ -442,7 +411,6 @@ int32_t CommonSparseTable::pull_sparse(float* pull_values, ...@@ -442,7 +411,6 @@ int32_t CommonSparseTable::pull_sparse(float* pull_values,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) { for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait(); tasks[shard_id].wait();
} }
rwlock_->UNLock();
return 0; return 0;
} }
...@@ -470,7 +438,7 @@ int32_t CommonSparseTable::pull_sparse_ptr(char** pull_values, ...@@ -470,7 +438,7 @@ int32_t CommonSparseTable::pull_sparse_ptr(char** pull_values,
auto* value = block->InitGet(id); auto* value = block->InitGet(id);
// std::copy_n(value + param_offset_, param_dim_, // std::copy_n(value + param_offset_, param_dim_,
// pull_values + param_dim_ * offset); // pull_values + param_dim_ * offset);
pull_values[offset] = (char*)value; pull_values[offset] = reinterpret_cast<char*>(value);
} }
return 0; return 0;
...@@ -485,7 +453,6 @@ int32_t CommonSparseTable::pull_sparse_ptr(char** pull_values, ...@@ -485,7 +453,6 @@ int32_t CommonSparseTable::pull_sparse_ptr(char** pull_values,
int32_t CommonSparseTable::_push_sparse(const uint64_t* keys, int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
const float* values, size_t num) { const float* values, size_t num) {
rwlock_->RDLock();
std::vector<std::vector<uint64_t>> offset_bucket; std::vector<std::vector<uint64_t>> offset_bucket;
offset_bucket.resize(task_pool_size_); offset_bucket.resize(task_pool_size_);
...@@ -509,7 +476,6 @@ int32_t CommonSparseTable::_push_sparse(const uint64_t* keys, ...@@ -509,7 +476,6 @@ int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) { for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait(); tasks[shard_id].wait();
} }
rwlock_->UNLock();
return 0; return 0;
} }
...@@ -547,7 +513,6 @@ int32_t CommonSparseTable::push_sparse(const uint64_t* keys, ...@@ -547,7 +513,6 @@ int32_t CommonSparseTable::push_sparse(const uint64_t* keys,
int32_t CommonSparseTable::_push_sparse(const uint64_t* keys, int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
const float** values, size_t num) { const float** values, size_t num) {
rwlock_->RDLock();
std::vector<std::vector<uint64_t>> offset_bucket; std::vector<std::vector<uint64_t>> offset_bucket;
offset_bucket.resize(task_pool_size_); offset_bucket.resize(task_pool_size_);
...@@ -574,14 +539,11 @@ int32_t CommonSparseTable::_push_sparse(const uint64_t* keys, ...@@ -574,14 +539,11 @@ int32_t CommonSparseTable::_push_sparse(const uint64_t* keys,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) { for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait(); tasks[shard_id].wait();
} }
rwlock_->UNLock();
return 0; return 0;
} }
int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys, int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys,
const float* values, size_t num) { const float* values, size_t num) {
rwlock_->RDLock();
std::vector<std::vector<uint64_t>> offset_bucket; std::vector<std::vector<uint64_t>> offset_bucket;
offset_bucket.resize(task_pool_size_); offset_bucket.resize(task_pool_size_);
...@@ -613,14 +575,12 @@ int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys, ...@@ -613,14 +575,12 @@ int32_t CommonSparseTable::push_sparse_param(const uint64_t* keys,
for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) { for (size_t shard_id = 0; shard_id < tasks.size(); ++shard_id) {
tasks[shard_id].wait(); tasks[shard_id].wait();
} }
rwlock_->UNLock();
return 0; return 0;
} }
int32_t CommonSparseTable::flush() { return 0; } int32_t CommonSparseTable::flush() { return 0; }
int32_t CommonSparseTable::shrink(const std::string& param) { int32_t CommonSparseTable::shrink(const std::string& param) {
rwlock_->WRLock();
int threshold = std::stoi(param); int threshold = std::stoi(param);
VLOG(3) << "sparse table shrink: " << threshold; VLOG(3) << "sparse table shrink: " << threshold;
...@@ -629,7 +589,6 @@ int32_t CommonSparseTable::shrink(const std::string& param) { ...@@ -629,7 +589,6 @@ int32_t CommonSparseTable::shrink(const std::string& param) {
VLOG(4) << shard_id << " " << task_pool_size_ << " begin shrink"; VLOG(4) << shard_id << " " << task_pool_size_ << " begin shrink";
shard_values_[shard_id]->Shrink(threshold); shard_values_[shard_id]->Shrink(threshold);
} }
rwlock_->UNLock();
return 0; return 0;
} }
......
...@@ -32,11 +32,83 @@ ...@@ -32,11 +32,83 @@
#include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/string/string_helper.h" #include "paddle/fluid/string/string_helper.h"
#define PSERVER_SAVE_SUFFIX ".shard"
using boost::lexical_cast;
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
class SparseOptimizer; class SparseOptimizer;
enum SaveMode { all, base, delta };
struct Meta {
std::string param;
int shard_id;
std::vector<std::string> names;
std::vector<int> dims;
uint64_t count;
std::unordered_map<std::string, int> dims_map;
explicit Meta(const std::string& metapath) {
std::ifstream file(metapath);
std::string line;
int num_lines = 0;
while (std::getline(file, line)) {
if (StartWith(line, "#")) {
continue;
}
auto pairs = paddle::string::split_string<std::string>(line, "=");
PADDLE_ENFORCE_EQ(
pairs.size(), 2,
paddle::platform::errors::InvalidArgument(
"info in %s except k=v, but got %s", metapath, line));
if (pairs[0] == "param") {
param = pairs[1];
}
if (pairs[0] == "shard_id") {
shard_id = std::stoi(pairs[1]);
}
if (pairs[0] == "row_names") {
names = paddle::string::split_string<std::string>(pairs[1], ",");
}
if (pairs[0] == "row_dims") {
auto dims_strs =
paddle::string::split_string<std::string>(pairs[1], ",");
for (auto& str : dims_strs) {
dims.push_back(std::stoi(str));
}
}
if (pairs[0] == "count") {
count = std::stoull(pairs[1]);
}
}
for (int x = 0; x < names.size(); ++x) {
dims_map[names[x]] = dims[x];
}
}
Meta(std::string param, int shard_id, std::vector<std::string> row_names,
std::vector<int> dims, uint64_t count) {
this->param = param;
this->shard_id = shard_id;
this->names = row_names;
this->dims = dims;
this->count = count;
}
std::string ToString() {
std::stringstream ss;
ss << "param=" << param << "\n";
ss << "shard_id=" << shard_id << "\n";
ss << "row_names=" << paddle::string::join_strings(names, ',') << "\n";
ss << "row_dims=" << paddle::string::join_strings(dims, ',') << "\n";
ss << "count=" << count << "\n";
return ss.str();
}
};
class CommonSparseTable : public SparseTable { class CommonSparseTable : public SparseTable {
public: public:
CommonSparseTable() { rwlock_.reset(new framework::RWLock); } CommonSparseTable() { rwlock_.reset(new framework::RWLock); }
...@@ -56,9 +128,25 @@ class CommonSparseTable : public SparseTable { ...@@ -56,9 +128,25 @@ class CommonSparseTable : public SparseTable {
virtual int32_t initialize_optimizer(); virtual int32_t initialize_optimizer();
virtual int32_t initialize_recorder(); virtual int32_t initialize_recorder();
int32_t load(const std::string& path, const std::string& param); virtual int32_t load(const std::string& path, const std::string& param);
virtual int32_t save(const std::string& path, const std::string& param);
void SaveMetaToText(std::ostream* os, const CommonAccessorParameter& common,
const size_t shard_idx, const int64_t total);
int32_t save(const std::string& path, const std::string& param); int64_t SaveValueToText(std::ostream* os, std::shared_ptr<ValueBlock> block,
std::shared_ptr<::ThreadPool> pool, const int mode,
int shard_id);
virtual void ProcessALine(const std::vector<std::string>& columns,
const Meta& meta, const int64_t id,
std::vector<std::vector<float>>* values);
virtual int64_t LoadFromText(
const std::string& valuepath, const std::string& metapath,
const int pserver_id, const int pserver_num, const int local_shard_num,
std::vector<std::shared_ptr<ValueBlock>>* blocks);
virtual std::pair<int64_t, int64_t> print_table_stat(); virtual std::pair<int64_t, int64_t> print_table_stat();
virtual int32_t pull_sparse(float* values, const PullSparseValue& pull_value); virtual int32_t pull_sparse(float* values, const PullSparseValue& pull_value);
...@@ -89,7 +177,7 @@ class CommonSparseTable : public SparseTable { ...@@ -89,7 +177,7 @@ class CommonSparseTable : public SparseTable {
virtual int32_t _push_sparse(const uint64_t* keys, const float** values, virtual int32_t _push_sparse(const uint64_t* keys, const float** values,
size_t num); size_t num);
private: protected:
const int task_pool_size_ = 11; const int task_pool_size_ = 11;
std::vector<std::shared_ptr<::ThreadPool>> _shards_task_pool; std::vector<std::shared_ptr<::ThreadPool>> _shards_task_pool;
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "butil/object_pool.h"
#include "paddle/fluid/distributed/common/utils.h" #include "paddle/fluid/distributed/common/utils.h"
#include "paddle/fluid/distributed/table/depends/initializers.h" #include "paddle/fluid/distributed/table/depends/initializers.h"
#include "paddle/fluid/distributed/thirdparty/round_robin.h" #include "paddle/fluid/distributed/thirdparty/round_robin.h"
...@@ -48,6 +49,10 @@ namespace distributed { ...@@ -48,6 +49,10 @@ namespace distributed {
enum Mode { training, infer }; enum Mode { training, infer };
static const int SPARSE_SHARD_BUCKET_NUM_BITS = 6;
static const size_t SPARSE_SHARD_BUCKET_NUM = (size_t)1
<< SPARSE_SHARD_BUCKET_NUM_BITS;
struct VALUE { struct VALUE {
explicit VALUE(size_t length) explicit VALUE(size_t length)
: length_(length), : length_(length),
...@@ -55,46 +60,16 @@ struct VALUE { ...@@ -55,46 +60,16 @@ struct VALUE {
unseen_days_(0), unseen_days_(0),
need_save_(false), need_save_(false),
is_entry_(false) { is_entry_(false) {
data_ = new float[length]; data_.resize(length);
memset(data_, 0, sizeof(float) * length); memset(data_.data(), 0, sizeof(float) * length);
}
VALUE(const VALUE &value) {
length_ = value.length_;
count_ = value.count_;
unseen_days_ = value.unseen_days_;
need_save_ = value.need_save_;
is_entry_ = value.is_entry_;
data_ = new float[length_];
memcpy(data_, value.data_, sizeof(float) * length_);
}
VALUE &operator=(const VALUE &value) {
if (this != &value) {
delete[] data_;
length_ = value.length_;
count_ = value.count_;
unseen_days_ = value.unseen_days_;
need_save_ = value.need_save_;
is_entry_ = value.is_entry_;
data_ = new float[length_];
memcpy(data_, value.data_, sizeof(float) * length_);
}
return *this;
}
~VALUE() {
delete[] data_;
data_ = nullptr;
} }
size_t length_; size_t length_;
std::vector<float> data_;
int count_; int count_;
int unseen_days_; // use to check knock-out int unseen_days_; // use to check knock-out
bool need_save_; // whether need to save bool need_save_; // whether need to save
bool is_entry_; // whether knock-in bool is_entry_; // whether knock-in
float *data_;
}; };
inline bool count_entry(VALUE *value, int threshold) { inline bool count_entry(VALUE *value, int threshold) {
...@@ -108,6 +83,7 @@ inline bool probility_entry(VALUE *value, float threshold) { ...@@ -108,6 +83,7 @@ inline bool probility_entry(VALUE *value, float threshold) {
class ValueBlock { class ValueBlock {
public: public:
typedef typename robin_hood::unordered_map<uint64_t, VALUE *> map_type;
explicit ValueBlock(const std::vector<std::string> &value_names, explicit ValueBlock(const std::vector<std::string> &value_names,
const std::vector<int> &value_dims, const std::vector<int> &value_dims,
const std::vector<int> &value_offsets, const std::vector<int> &value_offsets,
...@@ -176,12 +152,12 @@ class ValueBlock { ...@@ -176,12 +152,12 @@ class ValueBlock {
const std::vector<int> &value_dims) { const std::vector<int> &value_dims) {
auto pts = std::vector<float *>(); auto pts = std::vector<float *>();
pts.reserve(value_names.size()); pts.reserve(value_names.size());
auto &values = values_.at(id); auto values = GetValue(id);
for (int i = 0; i < static_cast<int>(value_names.size()); i++) { for (int i = 0; i < static_cast<int>(value_names.size()); i++) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
value_dims[i], value_dims_[i], value_dims[i], value_dims_[i],
platform::errors::InvalidArgument("value dims is not match")); platform::errors::InvalidArgument("value dims is not match"));
pts.push_back(values.data_ + pts.push_back(values->data_.data() +
value_offsets_.at(value_idx_.at(value_names[i]))); value_offsets_.at(value_idx_.at(value_names[i])));
} }
return pts; return pts;
...@@ -190,33 +166,45 @@ class ValueBlock { ...@@ -190,33 +166,45 @@ class ValueBlock {
// pull // pull
float *Init(const uint64_t &id, const bool with_update = true, float *Init(const uint64_t &id, const bool with_update = true,
const int counter = 1) { const int counter = 1) {
if (!Has(id)) { size_t hash = _hasher(id);
values_.emplace(std::make_pair(id, VALUE(value_length_))); size_t bucket = compute_bucket(hash);
}
auto &value = values_.at(id); auto &table = values_[bucket];
auto res = table.find(id);
if (with_update) { VALUE *value = nullptr;
AttrUpdate(&value, counter); if (res == table.end()) {
value = butil::get_object<VALUE>(value_length_);
table[id] = value;
} else {
value = res->second;
} }
return value.data_; if (with_update) {
AttrUpdate(value, counter);
}
return value->data_.data();
} }
VALUE *InitGet(const uint64_t &id, const bool with_update = true, VALUE *InitGet(const uint64_t &id, const bool with_update = true,
const int counter = 1) { const int counter = 1) {
if (!Has(id)) { size_t hash = _hasher(id);
values_.emplace(std::make_pair(id, VALUE(value_length_))); size_t bucket = compute_bucket(hash);
}
auto &value = values_.at(id); auto &table = values_[bucket];
auto res = table.find(id);
if (with_update) { VALUE *value = nullptr;
AttrUpdate(&value, counter); if (res == table.end()) {
value = butil::get_object<VALUE>(value_length_);
// value = _alloc.acquire(value_length_);
table[id] = value;
} else {
value = (VALUE *)(void *)(res->second);
} }
return value;
return &value;
} }
void AttrUpdate(VALUE *value, const int counter) { void AttrUpdate(VALUE *value, const int counter) {
...@@ -229,7 +217,7 @@ class ValueBlock { ...@@ -229,7 +217,7 @@ class ValueBlock {
if (value->is_entry_) { if (value->is_entry_) {
// initialize // initialize
for (size_t x = 0; x < value_names_.size(); ++x) { for (size_t x = 0; x < value_names_.size(); ++x) {
initializers_[x]->GetValue(value->data_ + value_offsets_[x], initializers_[x]->GetValue(value->data_.data() + value_offsets_[x],
value_dims_[x]); value_dims_[x]);
} }
value->need_save_ = true; value->need_save_ = true;
...@@ -243,42 +231,102 @@ class ValueBlock { ...@@ -243,42 +231,102 @@ class ValueBlock {
// dont jude if (has(id)) // dont jude if (has(id))
float *Get(const uint64_t &id) { float *Get(const uint64_t &id) {
auto &value = values_.at(id); size_t hash = _hasher(id);
return value.data_; size_t bucket = compute_bucket(hash);
auto &table = values_[bucket];
// auto &value = table.at(id);
// return value->data_.data();
auto res = table.find(id);
VALUE *value = res->second;
return value->data_.data();
} }
// for load, to reset count, unseen_days // for load, to reset count, unseen_days
VALUE *GetValue(const uint64_t &id) { return &values_.at(id); } VALUE *GetValue(const uint64_t &id) {
size_t hash = _hasher(id);
size_t bucket = compute_bucket(hash);
auto &table = values_[bucket];
auto res = table.find(id);
return res->second;
}
bool GetEntry(const uint64_t &id) { bool GetEntry(const uint64_t &id) {
auto &value = values_.at(id); auto value = GetValue(id);
return value.is_entry_; return value->is_entry_;
} }
void SetEntry(const uint64_t &id, const bool state) { void SetEntry(const uint64_t &id, const bool state) {
auto &value = values_.at(id); auto value = GetValue(id);
value.is_entry_ = state; value->is_entry_ = state;
}
void erase(uint64_t feasign) {
size_t hash = _hasher(feasign);
size_t bucket = compute_bucket(hash);
auto &table = values_[bucket];
auto iter = table.find(feasign);
if (iter != table.end()) {
butil::return_object(iter->second);
iter = table.erase(iter);
}
} }
void Shrink(const int threshold) { void Shrink(const int threshold) {
for (auto iter = values_.begin(); iter != values_.end();) { for (auto &table : values_) {
auto &value = iter->second; for (auto iter = table.begin(); iter != table.end();) {
value.unseen_days_++; // VALUE* value = (VALUE*)(void*)(iter->second);
if (value.unseen_days_ >= threshold) { VALUE *value = iter->second;
iter = values_.erase(iter); value->unseen_days_++;
} else { if (value->unseen_days_ >= threshold) {
++iter; butil::return_object(iter->second);
//_alloc.release(iter->second);
//_alloc.release(value);
iter = table.erase(iter);
} else {
++iter;
}
} }
} }
return; return;
} }
float GetThreshold() { return threshold_; } float GetThreshold() { return threshold_; }
size_t compute_bucket(size_t hash) {
if (SPARSE_SHARD_BUCKET_NUM == 1) {
return 0;
} else {
return hash >> (sizeof(size_t) * 8 - SPARSE_SHARD_BUCKET_NUM_BITS);
}
}
map_type::iterator end() {
return values_[SPARSE_SHARD_BUCKET_NUM - 1].end();
}
map_type::iterator Find(uint64_t id) {
size_t hash = _hasher(id);
size_t bucket = compute_bucket(hash);
auto &table = values_[bucket];
auto got = table.find(id);
if (got == table.end()) {
return end();
} else {
return got;
}
}
private: private:
bool Has(const uint64_t id) { bool Has(const uint64_t id) {
auto got = values_.find(id); size_t hash = _hasher(id);
if (got == values_.end()) { size_t bucket = compute_bucket(hash);
auto &table = values_[bucket];
auto got = table.find(id);
if (got == table.end()) {
return false; return false;
} else { } else {
return true; return true;
...@@ -286,8 +334,9 @@ class ValueBlock { ...@@ -286,8 +334,9 @@ class ValueBlock {
} }
public: public:
robin_hood::unordered_map<uint64_t, VALUE> values_; map_type values_[SPARSE_SHARD_BUCKET_NUM];
size_t value_length_ = 0; size_t value_length_ = 0;
std::hash<uint64_t> _hasher;
private: private:
const std::vector<std::string> &value_names_; const std::vector<std::string> &value_names_;
...@@ -302,4 +351,3 @@ class ValueBlock { ...@@ -302,4 +351,3 @@ class ValueBlock {
} // namespace distributed } // namespace distributed
} // namespace paddle } // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_HETERPS
#include <glog/logging.h>
#include <rocksdb/db.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/options.h>
#include <rocksdb/slice.h>
#include <rocksdb/table.h>
#include <rocksdb/write_batch.h>
#include <iostream>
#include <string>
namespace paddle {
namespace distributed {
class RocksDBHandler {
public:
RocksDBHandler() {}
~RocksDBHandler() {}
static RocksDBHandler* GetInstance() {
static RocksDBHandler handler;
return &handler;
}
int initialize(const std::string& db_path, const int colnum) {
VLOG(3) << "db path: " << db_path << " colnum: " << colnum;
rocksdb::Options options;
rocksdb::BlockBasedTableOptions bbto;
bbto.block_size = 4 * 1024;
bbto.block_cache = rocksdb::NewLRUCache(64 * 1024 * 1024);
bbto.block_cache_compressed = rocksdb::NewLRUCache(64 * 1024 * 1024);
bbto.cache_index_and_filter_blocks = false;
bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(20, false));
bbto.whole_key_filtering = true;
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbto));
options.keep_log_file_num = 100;
options.max_log_file_size = 50 * 1024 * 1024; // 50MB
options.create_if_missing = true;
options.use_direct_reads = true;
options.max_background_flushes = 5;
options.max_background_compactions = 5;
options.base_background_compactions = 10;
options.write_buffer_size = 256 * 1024 * 1024; // 256MB
options.max_write_buffer_number = 8;
options.max_bytes_for_level_base =
options.max_write_buffer_number * options.write_buffer_size;
options.min_write_buffer_number_to_merge = 1;
options.target_file_size_base = 1024 * 1024 * 1024; // 1024MB
options.memtable_prefix_bloom_size_ratio = 0.02;
options.num_levels = 4;
options.max_open_files = -1;
options.compression = rocksdb::kNoCompression;
options.level0_file_num_compaction_trigger = 8;
options.level0_slowdown_writes_trigger =
1.8 * options.level0_file_num_compaction_trigger;
options.level0_stop_writes_trigger =
3.6 * options.level0_file_num_compaction_trigger;
if (!db_path.empty()) {
std::string rm_cmd = "rm -rf " + db_path;
system(rm_cmd.c_str());
}
rocksdb::Status s = rocksdb::DB::Open(options, db_path, &_db);
assert(s.ok());
_handles.resize(colnum);
for (int i = 0; i < colnum; i++) {
s = _db->CreateColumnFamily(options, "shard_" + std::to_string(i),
&_handles[i]);
assert(s.ok());
}
LOG(INFO) << "DB initialize success, colnum:" << colnum;
return 0;
}
int put(int id, const char* key, int key_len, const char* value,
int value_len) {
rocksdb::WriteOptions options;
options.disableWAL = true;
rocksdb::Status s =
_db->Put(options, _handles[id], rocksdb::Slice(key, key_len),
rocksdb::Slice(value, value_len));
assert(s.ok());
return 0;
}
int put_batch(int id, std::vector<std::pair<char*, int>>& ssd_keys,
std::vector<std::pair<char*, int>>& ssd_values, int n) {
rocksdb::WriteOptions options;
options.disableWAL = true;
rocksdb::WriteBatch batch(n * 128);
for (int i = 0; i < n; i++) {
batch.Put(_handles[id],
rocksdb::Slice(ssd_keys[i].first, ssd_keys[i].second),
rocksdb::Slice(ssd_values[i].first, ssd_values[i].second));
}
rocksdb::Status s = _db->Write(options, &batch);
assert(s.ok());
return 0;
}
int get(int id, const char* key, int key_len, std::string& value) {
rocksdb::Status s = _db->Get(rocksdb::ReadOptions(), _handles[id],
rocksdb::Slice(key, key_len), &value);
if (s.IsNotFound()) {
return 1;
}
assert(s.ok());
return 0;
}
int del_data(int id, const char* key, int key_len) {
rocksdb::WriteOptions options;
options.disableWAL = true;
rocksdb::Status s =
_db->Delete(options, _handles[id], rocksdb::Slice(key, key_len));
assert(s.ok());
return 0;
}
int flush(int id) {
rocksdb::Status s = _db->Flush(rocksdb::FlushOptions(), _handles[id]);
assert(s.ok());
return 0;
}
rocksdb::Iterator* get_iterator(int id) {
return _db->NewIterator(rocksdb::ReadOptions(), _handles[id]);
}
int get_estimate_key_num(uint64_t& num_keys) {
_db->GetAggregatedIntProperty("rocksdb.estimate-num-keys", &num_keys);
return 0;
}
private:
std::vector<rocksdb::ColumnFamilyHandle*> _handles;
rocksdb::DB* _db;
};
}
}
#endif
...@@ -113,5 +113,5 @@ void FeatureNode::recover_from_buffer(char* buffer) { ...@@ -113,5 +113,5 @@ void FeatureNode::recover_from_buffer(char* buffer) {
feature.push_back(std::string(str)); feature.push_back(std::string(str));
} }
} }
} } // namespace distributed
} } // namespace paddle
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <memory>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include "paddle/fluid/distributed/table/graph/graph_weighted_sampler.h" #include "paddle/fluid/distributed/table/graph/graph_weighted_sampler.h"
...@@ -33,7 +34,10 @@ class Node { ...@@ -33,7 +34,10 @@ class Node {
virtual void build_edges(bool is_weighted) {} virtual void build_edges(bool is_weighted) {}
virtual void build_sampler(std::string sample_type) {} virtual void build_sampler(std::string sample_type) {}
virtual void add_edge(uint64_t id, float weight) {} virtual void add_edge(uint64_t id, float weight) {}
virtual std::vector<int> sample_k(int k) { return std::vector<int>(); } virtual std::vector<int> sample_k(
int k, const std::shared_ptr<std::mt19937_64> rng) {
return std::vector<int>();
}
virtual uint64_t get_neighbor_id(int idx) { return 0; } virtual uint64_t get_neighbor_id(int idx) { return 0; }
virtual float get_neighbor_weight(int idx) { return 1.; } virtual float get_neighbor_weight(int idx) { return 1.; }
...@@ -59,7 +63,10 @@ class GraphNode : public Node { ...@@ -59,7 +63,10 @@ class GraphNode : public Node {
virtual void add_edge(uint64_t id, float weight) { virtual void add_edge(uint64_t id, float weight) {
edges->add_edge(id, weight); edges->add_edge(id, weight);
} }
virtual std::vector<int> sample_k(int k) { return sampler->sample_k(k); } virtual std::vector<int> sample_k(
int k, const std::shared_ptr<std::mt19937_64> rng) {
return sampler->sample_k(k, rng);
}
virtual uint64_t get_neighbor_id(int idx) { return edges->get_id(idx); } virtual uint64_t get_neighbor_id(int idx) { return edges->get_id(idx); }
virtual float get_neighbor_weight(int idx) { return edges->get_weight(idx); } virtual float get_neighbor_weight(int idx) { return edges->get_weight(idx); }
...@@ -123,5 +130,5 @@ class FeatureNode : public Node { ...@@ -123,5 +130,5 @@ class FeatureNode : public Node {
protected: protected:
std::vector<std::string> feature; std::vector<std::string> feature;
}; };
} } // namespace distributed
} } // namespace paddle
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
#include "paddle/fluid/distributed/table/common_graph_table.h" #include "paddle/fluid/distributed/table/common_graph_table.h"
#include "paddle/fluid/distributed/table/common_sparse_table.h" #include "paddle/fluid/distributed/table/common_sparse_table.h"
#include "paddle/fluid/distributed/table/sparse_geo_table.h" #include "paddle/fluid/distributed/table/sparse_geo_table.h"
#ifdef PADDLE_WITH_HETERPS
#include "paddle/fluid/distributed/table/ssd_sparse_table.h"
#endif
#include "paddle/fluid/distributed/table/tensor_accessor.h" #include "paddle/fluid/distributed/table/tensor_accessor.h"
#include "paddle/fluid/distributed/table/tensor_table.h" #include "paddle/fluid/distributed/table/tensor_table.h"
...@@ -29,6 +32,9 @@ namespace distributed { ...@@ -29,6 +32,9 @@ namespace distributed {
REGISTER_PSCORE_CLASS(Table, GraphTable); REGISTER_PSCORE_CLASS(Table, GraphTable);
REGISTER_PSCORE_CLASS(Table, CommonDenseTable); REGISTER_PSCORE_CLASS(Table, CommonDenseTable);
REGISTER_PSCORE_CLASS(Table, CommonSparseTable); REGISTER_PSCORE_CLASS(Table, CommonSparseTable);
#ifdef PADDLE_WITH_HETERPS
REGISTER_PSCORE_CLASS(Table, SSDSparseTable);
#endif
REGISTER_PSCORE_CLASS(Table, SparseGeoTable); REGISTER_PSCORE_CLASS(Table, SparseGeoTable);
REGISTER_PSCORE_CLASS(Table, BarrierTable); REGISTER_PSCORE_CLASS(Table, BarrierTable);
REGISTER_PSCORE_CLASS(Table, TensorTable); REGISTER_PSCORE_CLASS(Table, TensorTable);
......
set_source_files_properties(table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(table_test SRCS table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) cc_test(table_test SRCS table_test.cc DEPS common_table table tensor_accessor
ps_framework_proto ${COMMON_DEPS} ${RPC_DEPS})
set_source_files_properties(dense_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(dense_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(dense_table_test SRCS dense_table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) cc_test(dense_table_test SRCS dense_table_test.cc DEPS common_table table
tensor_accessor ps_framework_proto ${COMMON_DEPS} ${RPC_DEPS})
set_source_files_properties(barrier_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(barrier_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(barrier_table_test SRCS barrier_table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) cc_test(barrier_table_test SRCS barrier_table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS})
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册