diff --git a/Dockerfile b/Dockerfile index fc5069a6c080ed23317695e6822c4c46b5b5c7f9..48c750358cfcb227667c429f19befcaa2f51ebbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ ENV HOME /root COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ - apt-get install -y --allow-downgrades \ + apt-get install -y --allow-downgrades patchelf \ git python-pip python-dev python-opencv openssh-server bison \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ diff --git a/cmake/generic.cmake b/cmake/generic.cmake index fd7fc16bff5651f022b484623243048fbd225b5a..eafb11b6f21e226fc68556a78d675dea94080140 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -257,8 +257,8 @@ function(cc_test TARGET_NAME) set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) - add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} ${cc_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) @@ -324,8 +324,8 @@ function(nv_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) - add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_test(${TARGET_NAME} ${TARGET_NAME}) if (nv_test_SERIAL) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) diff --git a/doc/fluid/api/transpiler.rst b/doc/fluid/api/transpiler.rst index 943d39331d26c05764c90cb24f6774997c976bfe..d2ac04f1449c32cb414cea1b76d7469bbe9ccb85 100644 --- a/doc/fluid/api/transpiler.rst +++ b/doc/fluid/api/transpiler.rst @@ -14,6 +14,15 @@ DistributeTranspiler :members: :noindex: +.. _api_fluid_transpiler_InferenceTranspiler: + +InferenceTranspiler +------------------- + +.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler + :members: + :noindex: + .. _api_fluid_transpiler_memory_optimize: memory_optimize diff --git a/doc/fluid/design/dist_train/dist_train_nccl2.md b/doc/fluid/design/dist_train/dist_train_nccl2.md new file mode 100644 index 0000000000000000000000000000000000000000..aa7455ec5de0d46d7c2b0cef3b7ebf4754af3cb1 --- /dev/null +++ b/doc/fluid/design/dist_train/dist_train_nccl2.md @@ -0,0 +1,35 @@ +# Distributed Training with NCCL2 + +We design a pattern that can enable training with `ParallelExecutor` and +using [NCCL2](https://developer.nvidia.com/nccl) as it's collective +communication library. + +In `ParallelExecutor` we can use `AllReduce` or `Reduce` and `Broadcast` +to do multi GPU training. And if we initialize NCCL2 communicators as +ranks in a distributed environment, we can simply run the `ParallelExecutor` +as a distributed program! The only thing that may be different than in +the single node version is that we need to broadcast the NCCL unique ID +to all the nodes, and initialize communicators using that ID, so NCCL2 +will know each other as ranks. + +To achieve this feature, we introduce a new operator: `gen_nccl_id` op, +so we are ***not*** "bind to" running NCCL2 with MPI, we can run it in +what ever platform you like. + +It have two running modes: + +1. Generate and broadcast mode, which should be used on trainer 0; +1. Listen and fetch mode, which should be used on trainers other than 0. + +In both two modes, this op can save the NCCL ID into current scope as a +persistable variable, Then we can insert this op at the end of +"startup program" of fluid, so that all workers can get the same ID to +initialize NCCL communicator objects. + + + +The above figure indicates the general process when training with NCCL2 +distributed. Each trainer have the number of communicators equal to the +number of GPUs, but the ranks should match the global ranks number: here +we have total 8 GPUs, so `nranks==8`, for each trainer, the ranks should +be from 0 ~ 3 on trainer 0 and 4 ~ 7 on trainer 1. diff --git a/doc/fluid/design/dist_train/distributed_lookup_table_design.md b/doc/fluid/design/dist_train/distributed_lookup_table_design.md index 988729138926f035750b59eb245dde82502a3ad2..97f890c88e778a59ea475e984ccbc28cf026fc5b 100644 --- a/doc/fluid/design/dist_train/distributed_lookup_table_design.md +++ b/doc/fluid/design/dist_train/distributed_lookup_table_design.md @@ -119,6 +119,32 @@ optimization algorithm $f$ runs on the storage service. - Con: the storage service needs to be able to run the optimization algorithm. +## Distributed Sparse Table in Fluid + +For another design, we can implement a distributed sparse table in Fluid, +and don't need to maintain an external storage component while training. + +You may need to read Fluid [Distributed Training Architecture](./distributed_architecture.md) +and [Parameter Server](./parameter_server.md) before going on. + +![fluid lookup remote table](./src/fluid_lookup_remote_table.png) + +Partition a large table into multiple pserver instances +1. `DistributeTranspiler` would split the table partitioned into some small +table blocks with some partitioned algorithms such as +[RoundRobin](https://en.wikipedia.org/wiki/Round-robin_scheduling), +[Hash](https://en.wikipedia.org/wiki/Hash) and etc... +1. For some cases, the range of input `Ids` is very wide and unpredictable, so the sparse +table would be able to fill a new value for the id that didn't appear before with +zero, uniform random or Gaussian distribution. + +For each Trainer's training process: +1. In the forward pass, we use `pre-fetch` op to pre-fetch parameter blocks according to the +input `Ids` from PServers instead of the local `lookup_table` op, and then merge the blocks +into a parameter `W`. +1. Compute `GRAD@W'` in the backward pass using the pre-fetched `W` and send it to PServer to +execute the optimize pass. + ## Conclusion Let us do the "storage service does not optimize" solution first, as a diff --git a/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle new file mode 100644 index 0000000000000000000000000000000000000000..96ca6d48f43bd9f49c6861dab006e2037873db87 Binary files /dev/null and b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle differ diff --git a/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png new file mode 100644 index 0000000000000000000000000000000000000000..afa25ab3b4e427bc595a855b12ab966478e01ed0 Binary files /dev/null and b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png differ diff --git a/doc/fluid/design/dist_train/src/ncc2_design.graffle b/doc/fluid/design/dist_train/src/ncc2_design.graffle new file mode 100644 index 0000000000000000000000000000000000000000..7d2753bbb03bc28c7a0054bb0aa424deb072ffbf Binary files /dev/null and b/doc/fluid/design/dist_train/src/ncc2_design.graffle differ diff --git a/doc/fluid/design/dist_train/src/ncc2_design.png b/doc/fluid/design/dist_train/src/ncc2_design.png new file mode 100644 index 0000000000000000000000000000000000000000..da0d5ee81f5dfeb4ca1356601b0bb5870456e3d6 Binary files /dev/null and b/doc/fluid/design/dist_train/src/ncc2_design.png differ diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h index ba266b608da342fb71faf05d02ddf74330e21e98..f9ec6f55449fc46b4a44b9563980cb5f8e80a951 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.h +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -22,9 +22,9 @@ #include "paddle/contrib/inference/paddle_inference_api.h" #include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 6286dda4a54991b7a1042aed9886fdcb694198ba..397c9f739452e5130dad28a763b92cf76720ec61 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -21,10 +21,10 @@ endif() cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context init) +nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) -nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init) +nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) cc_library(reader SRCS reader.cc DEPS lod_tensor ddim) @@ -38,7 +38,7 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor) nv_test(data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry init math_function) + DEPS operator op_registry device_context math_function) if(WITH_GPU) nv_library(data_type_transform SRCS data_type_transform.cu DEPS tensor) @@ -63,7 +63,7 @@ cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference data_transform lod_tensor profiler) -cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) +cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) @@ -101,14 +101,14 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) -cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece operator) -cc_test(init_test SRCS init_test.cc DEPS init) - cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) # cc_test(channel_test SRCS channel_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) -cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op - channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op - conditional_block_op while_op assign_op print_op executor proto_desc) + +# disable test temporarily. +# TODO https://github.com/PaddlePaddle/Paddle/issues/11971 +# cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op +# channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op +# conditional_block_op while_op assign_op print_op executor proto_desc) diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index a91fe5c99d397ef1bf04f6d22e988b6d3f33e500..f2c55e533a2747325b1b16fdada37945a8ed3c42 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -14,13 +14,13 @@ limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise_op_function.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/lod_tensor_test.cu b/paddle/fluid/framework/lod_tensor_test.cu index e3efbe4c464493af87e33510647d8c67d457a76d..b9950627ca378cb9607681799bd7fe5bfce2bf50 100644 --- a/paddle/fluid/framework/lod_tensor_test.cu +++ b/paddle/fluid/framework/lod_tensor_test.cu @@ -17,9 +17,9 @@ #include #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/assert.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" __global__ void test(size_t* a, int size) { diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index 74043b5d7990178976baf2fad991ae03f9c8dd25..e211c678f8d61ddb69b6c612338bfc6a0afe8cd7 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 7071eea19c355c04711a11c224985be96c6589f4..1895aea7f98cb1ad12b2ce16545339252349ea37 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor ) # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api diff --git a/paddle/fluid/inference/analysis/README.md b/paddle/fluid/inference/analysis/README.md index 6fd73958bc480fe3983b9622c03ac77fba9ec8a7..70adb4a974cc5f9911cb302840bbef7ec2591505 100644 --- a/paddle/fluid/inference/analysis/README.md +++ b/paddle/fluid/inference/analysis/README.md @@ -54,4 +54,5 @@ It can be used as a helper class that draws the modified graph after each pass. There is some helper legacy/function/class for analysis. - [dot.h](./dot.h) give a easy to use interface for generating `DOT` codes, -- [graph_traits.h](./graph_traits.h) contains the graph traversal algorithms, it uses `iterator` to make the algorithms easy to share across different passes. +- [graph_traits.h](./graph_traits.h) contains the interfaces of the graph traversal algorithms, it uses `iterator`to make the algorithms easy to share across different passes, +there are some implementations in [data_flow_graph.cc](./data_flow_graph.cc) , such as BFS and DFS.. diff --git a/paddle/fluid/inference/analysis/pass.h b/paddle/fluid/inference/analysis/pass.h index 25c566ebfa41abe3a247bc6c6e5583c8620a6abb..6b4dbb3bb5ddd9f15f26758beef1d1b5bbf49142 100644 --- a/paddle/fluid/inference/analysis/pass.h +++ b/paddle/fluid/inference/analysis/pass.h @@ -32,19 +32,6 @@ class Pass { public: Pass() = default; virtual ~Pass() = default; - // Virtual method overridden by subclasses to do only necessary initialization - // before any pass is run. - // virtual bool Initialize() { return false; } - // There is some passes such as FlowToDataFlowGraphPass that needs a - // ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it - // only couple with the proto file. - // virtual bool Initialize(const framework::proto::ProgramDesc &desc) { return - // false; } - // There are some Passes such as DataFlowGraphToFluidPass that will output a - // ProgramDesc. - // virtual bool Initialize(framework::proto::ProgramDesc *desc) { return - // false; } - // Mutable Pass. virtual bool Initialize(Argument *argument) { return false; } // Readonly Pass. diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 6b03ac7119b117e442e6af34c719c8a4f736bde9..181868977dd8f2568486ed0c4e1f260a69795896 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/pybind/pybind.h" DEFINE_string(devices, "", "The devices to be used which is joined by comma."); @@ -33,7 +33,7 @@ namespace inference { void Init(const std::vector argv) { framework::InitGflags(argv); - operators::math::SetNumThreads(FLAGS_math_num_threads); + platform::SetNumThreads(FLAGS_math_num_threads); // init devices std::vector devices; std::string token; diff --git a/paddle/fluid/inference/io.h b/paddle/fluid/inference/io.h index caf599b1a68783f155cd134c2a29e9ffa49a0895..01b50b3670cb9da2e0be232a61ea6129dd83aa20 100644 --- a/paddle/fluid/inference/io.h +++ b/paddle/fluid/inference/io.h @@ -18,9 +18,9 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 03b0b6946339772ac535b3471d50fbd74554239d..5cc1db12bb71e428d493e7c6f718b1c6ed431858 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" -#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/cpu_helper.h" #ifdef PADDLE_WITH_MKLML #include #endif @@ -164,7 +164,7 @@ TEST(inference, nlp) { // only use 1 thread number per std::thread omp_set_dynamic(0); omp_set_num_threads(1); - paddle::operators::math::SetNumThreads(1); + paddle::platform::SetNumThreads(1); #endif double start_ms = 0, stop_ms = 0; diff --git a/paddle/fluid/operators/distributed/CMakeLists.txt b/paddle/fluid/operators/distributed/CMakeLists.txt index 312f80e09077f21a47985c1c936c2ac41c292ead..675ca36774beb72cc1e9b136ad0b18ce061689ac 100644 --- a/paddle/fluid/operators/distributed/CMakeLists.txt +++ b/paddle/fluid/operators/distributed/CMakeLists.txt @@ -5,7 +5,7 @@ if(WITH_GRPC) set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set_source_files_properties(grpc_serde_test.cc rpc_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(serde_test SRCS grpc_serde_test.cc variable_response.cc DEPS grpc++_unsecure grpc_unsecure gpr - cares zlib protobuf sendrecvop_grpc SERIAL) + cares zlib protobuf sendrecvop_grpc scope profiler math_function SERIAL) cc_test(grpc_server_test SRCS rpc_server_test.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor proto_desc lookup_table_op SERIAL) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 53a478c1ac0bdf8c0a3f3721161779ef10cb14f8..5571ff9a7151c1f971ad1805bf001815a651202b 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -54,13 +54,13 @@ math_library(softmax DEPS math_function) math_library(unpooling) math_library(vol2col) -cc_test(math_function_test SRCS math_function_test.cc) +cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) if(WITH_GPU) - nv_test(math_function_gpu_test SRCS math_function_test.cu) - nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) + nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) + nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function) endif() cc_test(concat_test SRCS concat_test.cc DEPS concat) diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index a907d6a71b7a16983e601073b039b48406853a0b..9f6c1e5c35f02cd4bc729eea78b17fac017aa90e 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -23,41 +23,12 @@ #ifdef PADDLE_USE_OPENBLAS #include -#ifdef LAPACK_FOUND -#include -#endif -#endif - -#ifndef LAPACK_FOUND -extern "C" { -#include // NOLINT -int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, - int* ipiv); -int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, - int* ipiv); -int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, - const int* ipiv); -int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, - const int* ipiv); -} #endif namespace paddle { namespace operators { namespace math { -static void SetNumThreads(int num_threads) { -#ifdef PADDLE_USE_OPENBLAS - int real_num_threads = num_threads > 1 ? num_threads : 1; - openblas_set_num_threads(real_num_threads); -#elif defined(PADDLE_WITH_MKLML) - int real_num_threads = num_threads > 1 ? num_threads : 1; - platform::dynload::MKL_Set_Num_Threads(real_num_threads); -#else - PADDLE_ENFORCE(false, "To be implemented."); -#endif -} - /** * Matrix Descriptor of a memory buffer. * diff --git a/paddle/fluid/operators/math/math_function.h b/paddle/fluid/operators/math/math_function.h index 56a039d3cec7375517573c9429801945bf99741e..7ec78d9ef8e7ff966674b043c017f2fbedb77bb9 100644 --- a/paddle/fluid/operators/math/math_function.h +++ b/paddle/fluid/operators/math/math_function.h @@ -19,23 +19,6 @@ limitations under the License. */ #ifdef PADDLE_USE_OPENBLAS #include -#ifdef LAPACK_FOUND -#include -#endif -#endif - -#ifndef LAPACK_FOUND -extern "C" { -#include // NOLINT -int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, - int* ipiv); -int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, - int* ipiv); -int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, - const int* ipiv); -int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, - const int* ipiv); -} #endif #include diff --git a/paddle/fluid/operators/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl_op_test.cu.cc index ef54d79fdf2becde98c68044d14bd4347773b975..d5fb7a12e5d9757f3e639f6de7f0129bd531e2a1 100644 --- a/paddle/fluid/operators/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl_op_test.cu.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include // NOLINT #include -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" @@ -27,6 +26,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gpu_info.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" USE_NO_KERNEL_OP(ncclInit); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index b29035bafd34fa81dc6b59691142fe74439202b8..20037d0764056c2a093af801c9cc1eb788dd46d6 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -28,6 +28,9 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) +cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce) +cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper) + IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) ELSE() @@ -42,10 +45,12 @@ ENDIF() # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies -cc_library(device_context SRCS device_context.cc DEPS malloc - place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) +cc_library(device_context SRCS device_context.cc init.cc DEPS malloc + place eigen3 stringpiece cpu_helper ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) +cc_test(init_test SRCS init_test.cc DEPS device_context) + nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) @@ -53,5 +58,5 @@ cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framewo cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) -nv_test(float16_gpu_test SRCS float16_test.cu) -cc_test(float16_test SRCS float16_test.cc) +nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) +cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) diff --git a/paddle/fluid/platform/cpu_helper.cc b/paddle/fluid/platform/cpu_helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..77ecb170111d63f23312d06fa8a8172bc45f2a4e --- /dev/null +++ b/paddle/fluid/platform/cpu_helper.cc @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/cpu_helper.h" +#include "paddle/fluid/platform/enforce.h" + +#ifdef PADDLE_WITH_MKLML +#include "paddle/fluid/platform/dynload/mklml.h" +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#endif + +namespace paddle { +namespace platform { + +void SetNumThreads(int num_threads) { +#ifdef PADDLE_USE_OPENBLAS + int real_num_threads = num_threads > 1 ? num_threads : 1; + openblas_set_num_threads(real_num_threads); +#elif defined(PADDLE_WITH_MKLML) + int real_num_threads = num_threads > 1 ? num_threads : 1; + platform::dynload::MKL_Set_Num_Threads(real_num_threads); +#else + PADDLE_ENFORCE(false, "To be implemented."); +#endif +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cpu_helper.h b/paddle/fluid/platform/cpu_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..78fc392b632ef92d4ae08de2051041fc0bf6778b --- /dev/null +++ b/paddle/fluid/platform/cpu_helper.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +//! Set the number of threads in use. +void SetNumThreads(int num_threads); + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cpu_helper_test.cc b/paddle/fluid/platform/cpu_helper_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..dc1b2b56cd98ca6259c46a76231dbc99482970c1 --- /dev/null +++ b/paddle/fluid/platform/cpu_helper_test.cc @@ -0,0 +1,22 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/cpu_helper.h" + +#include "gtest/gtest.h" + +TEST(CpuHelper, SetNumThread) { + paddle::platform::SetNumThreads(1); + paddle::platform::SetNumThreads(4); +} diff --git a/paddle/fluid/platform/device_context_test.cu b/paddle/fluid/platform/device_context_test.cu index fa806aba6d8747beebc3eed2c661b326dd62fd76..171d2979a0218ad5e22112190a59866b3e0b617f 100644 --- a/paddle/fluid/platform/device_context_test.cu +++ b/paddle/fluid/platform/device_context_test.cu @@ -69,19 +69,3 @@ TEST(Device, DeviceContextPool) { ASSERT_NE(dev_ctx, nullptr); } } - -int main(int argc, char** argv) { - std::vector places; - - places.emplace_back(paddle::platform::CPUPlace()); - int count = paddle::platform::GetCUDADeviceCount(); - for (int i = 0; i < count; ++i) { - places.emplace_back(paddle::platform::CUDAPlace(i)); - } - - VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Init(places); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 198d8566b1bd726c5b33d8af22a19cb30a280fa2..93bf7c13516ffa4baca6a30f1daf946939726d85 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -36,8 +36,6 @@ DEFINE_string(cuda_dir, "", DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); -DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); - DEFINE_string(nccl_dir, "", "Specify path for loading nccl library, such as libcublas, " "libcurand. For instance, /usr/local/cuda/lib64. If default, " @@ -189,14 +187,6 @@ void* GetWarpCTCDsoHandle() { #endif } -void* GetLapackDsoHandle() { -#if defined(__APPLE__) || defined(__OSX__) - return GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib"); -#else - return GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so"); -#endif -} - void* GetNCCLDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib"); diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h index ca87dc47f355a8a4fc840262044413414edf00a0..84fd2ce9987628a5ed29e4125a03dedb96e416c1 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.h +++ b/paddle/fluid/platform/dynload/dynamic_loader.h @@ -23,7 +23,6 @@ void* GetCUDNNDsoHandle(); void* GetCUPTIDsoHandle(); void* GetCurandDsoHandle(); void* GetWarpCTCDsoHandle(); -void* GetLapackDsoHandle(); void* GetNCCLDsoHandle(); void* GetTensorRtDsoHandle(); void* GetMKLMLDsoHandle(); diff --git a/paddle/fluid/platform/float16_test.cc b/paddle/fluid/platform/float16_test.cc index a589e32b61a9b6a44bdc4529eee715d987d6922c..ede294be1e2e26693bd3ead2ccd5e6a6c8a075bc 100644 --- a/paddle/fluid/platform/float16_test.cc +++ b/paddle/fluid/platform/float16_test.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/framework/init.cc b/paddle/fluid/platform/init.cc similarity index 96% rename from paddle/fluid/framework/init.cc rename to paddle/fluid/platform/init.cc index a1094976f6c0965ac0a601d7e37575969146fdab..0b776528414735e8a7c1e3763e7ccb662bb9f285 100644 --- a/paddle/fluid/framework/init.cc +++ b/paddle/fluid/platform/init.cc @@ -16,10 +16,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/piece.h" @@ -115,7 +115,7 @@ void InitDevices(bool init_p2p, const std::vector devices) { places.emplace_back(platform::CPUPlace()); platform::DeviceContextPool::Init(places); #ifndef PADDLE_WITH_MKLDNN - operators::math::SetNumThreads(1); + platform::SetNumThreads(1); #endif } diff --git a/paddle/fluid/framework/init.h b/paddle/fluid/platform/init.h similarity index 100% rename from paddle/fluid/framework/init.h rename to paddle/fluid/platform/init.h diff --git a/paddle/fluid/framework/init_test.cc b/paddle/fluid/platform/init_test.cc similarity index 96% rename from paddle/fluid/framework/init_test.cc rename to paddle/fluid/platform/init_test.cc index 928e2d14abea604cf483f4bc1e1c58fbae04dd21..eef1470a90c7da15efff965fc8f66dfa616ba25f 100644 --- a/paddle/fluid/framework/init_test.cc +++ b/paddle/fluid/platform/init_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" TEST(InitDevices, CPU) { using paddle::framework::InitDevices; diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 4fef351c2118e43697606c90a616cd870e78cd77..89ca4f781273e99bbb83216c238dfc5c88c0a22b 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -2,13 +2,13 @@ if(WITH_PYTHON) if(WITH_AMD_GPU) hip_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method + DEPS pybind python proto_desc memory executor prune profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) else() cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method + DEPS pybind python proto_desc memory executor prune profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 3191f29fc3e5d2914e4b68be9e94ccc0d05f8f93..7a8bb712452538b7e2a349d56a15de3284f82b39 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -24,7 +24,6 @@ limitations under the License. */ #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" @@ -36,6 +35,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/const_value.h" diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 062095a1c3e977c0bcc89346ead765acb023bcf7..47de23377398423dabf3b0ed5b670e564f57cdfb 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -83,6 +83,13 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { tinyformat::vformat(out, fmt, tinyformat::makeFormatList(args...)); } +template +std::string Sprintf(const Args&... args) { + std::ostringstream oss; + Fprintf(oss, ""); + return oss.str(); +} + template std::string Sprintf(const char* fmt, const Args&... args) { std::ostringstream oss; diff --git a/paddle/fluid/string/printf_test.cc b/paddle/fluid/string/printf_test.cc index 678029f93534ab374bd29083f8991d632ccdd5a1..544b12ef3a877a6e84c136433799301edaa4abdf 100644 --- a/paddle/fluid/string/printf_test.cc +++ b/paddle/fluid/string/printf_test.cc @@ -27,4 +27,5 @@ TEST(StringPrintf, StringPrintf) { EXPECT_EQ(std::string("Wednesday, July 27, 14:44"), paddle::string::Sprintf("%s, %s %d, %.2d:%.2d", weekday, month, day, hour, min)); + EXPECT_EQ(std::string(""), paddle::string::Sprintf()); } diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc index 813d8386868558bd62a9d5670d540ddeddb2b77d..4425f062efa6eab552caee1a429746528cd66926 100644 --- a/paddle/fluid/train/demo/demo_trainer.cc +++ b/paddle/fluid/train/demo/demo_trainer.cc @@ -15,11 +15,11 @@ #include #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" namespace paddle { diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 09bbe4185e1709d379a819a94a712820ac1e9d89..d173b41e86f61954954b6a5ea9957d2e172deca0 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -318,7 +318,7 @@ function assert_api_not_changed() { virtualenv .env source .env/bin/activate pip install ${PADDLE_ROOT}/build/python/dist/*whl - curl ${PADDLE_API_SPEC_URL:-https://raw.githubusercontent.com/reyoung/FluidAPISpec/master/API.spec} \ + curl ${PADDLE_API_SPEC_URL:-https://raw.githubusercontent.com/PaddlePaddle/FluidAPISpec/master/API.spec} \ > origin.spec python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid > new.spec python ${PADDLE_ROOT}/tools/diff_api.py origin.spec new.spec diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index a1f446817e0cbc1b4391398a82b0846d01bbec2c..22644818994134d4797edfae8d156a005c103d52 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -6,6 +6,6 @@ if(WITH_TESTING) add_library(paddle_test_util STATIC TestUtil.cpp) add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) if(NOT MOBILE_INFERENCE) - cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init memory gtest gflags) + cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags) endif() endif() diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 555be3d00e2dc467eec45210cc997779827ed69f..cfea2059c3ce20fb44732d990e9708ad6f8d81a1 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "gflags/gflags.h" #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/memory/memory.h" +#include "paddle/fluid/platform/init.h" int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 05fed72ee6471ba42007b5a9f09f89148ac27a30..53d6ca86a008f798af2854a154cce8b7242d2f35 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -309,10 +309,10 @@ class DistributeTranspiler(object): def get_pserver_program(self, endpoint): """ Get parameter server side program. - + Args: endpoint (str): current parameter server endpoint. - + Returns: Program: the program for current parameter server to run. """ @@ -516,7 +516,7 @@ class DistributeTranspiler(object): endpoint (str): current pserver endpoint. pserver_program (Program): call get_pserver_program first and pass the result here. - + Returns: Program: parameter server side startup program. """ @@ -552,10 +552,10 @@ class DistributeTranspiler(object): op_on_pserver = True new_outputs[key] = pserver_vars[op.output(key)[0]] - # most startup program ops have no inputs - new_inputs = self._get_input_map_from_op(pserver_vars, op) - if op_on_pserver: + # most startup program ops have no inputs + new_inputs = self._get_input_map_from_op(pserver_vars, op) + if op.type in [ "gaussian_random", "fill_constant", "uniform_random" ]: diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index d32c69d148dfa1633ce344611ca3fe7879a234e9..b8afeae5ebd6ef7948a7c0c2775f419af461da04 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -19,7 +19,7 @@ from ..framework import Program from ..executor import global_scope -class InferenceTranspiler: +class InferenceTranspiler(object): ''' Convert the fluid program to optimized inference program. diff --git a/python/paddle/libs/__init__.py b/python/paddle/libs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..34d4f4d07ed0d452c1965c5f1f198230571931aa --- /dev/null +++ b/python/paddle/libs/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# used for setup.py.in to store the thirdparty shared libraries diff --git a/python/setup.py.in b/python/setup.py.in index 51380149d0b09224c02050902897f23f53600de2..5506443733650631fe045be3f701a41519352e8d 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,5 +1,7 @@ from setuptools import setup, Distribution, Extension import subprocess +import shutil +import os class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -62,6 +64,7 @@ write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') packages=['paddle', + 'paddle.libs', 'paddle.utils', 'paddle.dataset', 'paddle.reader', @@ -113,12 +116,35 @@ package_dir={ } if '${WITH_FLUID_ONLY}'== 'OFF': package_dir['py_paddle']='${PADDLE_BINARY_DIR}/python/py_paddle' - -paddle_rt_lib_dir = 'lib' -paddle_rt_libs = ['${WARPCTC_LIBRARIES}'] -if '${MKL_SHARED_LIBS}'!= '': - paddle_rt_libs += '${MKL_SHARED_LIBS}'.split(';') +# put all thirdparty libraries in paddle.libs +package_data['paddle.libs']=['libwarpctc.so'] +libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs' +shutil.copy('${WARPCTC_LIBRARIES}', libs_path) +if '${WITH_MKL}' == 'ON': + shutil.copy('${MKLML_LIB}', libs_path) + shutil.copy('${MKLML_IOMP_LIB}', libs_path) + package_data['paddle.libs']+=['libmklml_intel.so','libiomp5.so'] +if '${WITH_MKLDNN}' == 'ON': + # change rpath of libmkldnn.so.0, add $ORIGIN/ to it. + # The reason is that all thirdparty libraries in the same directory, + # thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so. + command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" + if os.system(command) != 0: + raise Exception("patchelf --set-rpath for libmkldnn.so.0 fails") + package_data['paddle.libs']+=['libmkldnn.so.0'] + shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) +# remove unused paddle/libs/__init__.py +os.remove(libs_path+'/__init__.py') +package_dir['paddle.libs']=libs_path + +# change rpath of core.so, add $ORIGIN/../libs/ to it. +# The reason is that libwarpctc.so, libiomp5.so etc are in paddle.libs, and +# core.so is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. +# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213 +command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" +if os.system(command) != 0: + raise Exception("patchelf --set-rpath for core.so fails") setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}', @@ -128,6 +154,5 @@ setup(name='${PACKAGE_NAME}', ext_modules=[Extension('_foo', ['stub.cc'])], package_data=package_data, package_dir=package_dir, - scripts=paddle_bins, - data_files=[(paddle_rt_lib_dir, paddle_rt_libs)] + scripts=paddle_bins )