diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f7f726b2bd6cb8916d450a189f1bea27ec1d8b05..6166a3e5a084364f18eef65d4e4219c235416bae 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -26,6 +26,7 @@ paddle.fluid.Variable.gradient (ArgSpec(args=['self'], varargs=None, keywords=No paddle.fluid.Variable.numpy (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7536e8feb56d827875943e7f01d406fc')) paddle.fluid.Variable.set_value (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', 'c424b9e763ff51c38a6917f98026fe7d')) paddle.fluid.Variable.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '31f359a2c074f26dc0ffff296fc3983f')) +paddle.fluid.load_op_library (ArgSpec(args=['lib_filename'], varargs=None, keywords=None, defaults=None), ('document', 'c009b2ea5fb6520f2d2f53aafec788e0')) paddle.fluid.Executor ('paddle.fluid.executor.Executor', ('document', '34e8c1769313fbeff7817212dda6259e')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a584496aa1343f36eebf3c46b323a74')) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 86de21d65a19715cc895a174df85d74c713219a3..75bf7e64c548c81bb3b0ce12f1fd8c6309e1a5a2 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -246,3 +246,35 @@ message(STATUS "commit: ${PADDLE_COMMIT}") message(STATUS "branch: ${PADDLE_BRANCH}") configure_file(commit.h.in commit.h) + +set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto) +if(WIN32) + sep_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) +else(WIN32) + cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) +endif(WIN32) + +cc_library(paddle_framework_shared + SHARED SRCS executor.cc operator.cc ${CMAKE_CURRENT_SOURCE_DIR}/c/c_api.cc + DEPS ${FLUID_FRAMEWORK_MODULES}) +get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) +set_target_properties(paddle_framework_shared PROPERTIES OUTPUT_NAME paddle_framework) +target_link_libraries(paddle_framework_shared ${os_dependency_modules}) + +if (LINUX) + set(FLUID_FRAMEWORK_SHARED_LIB + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.so + CACHE INTERNAL "Fluid framework lib") +endif() + +if (WIN32) + set(FLUID_FRAMEWORK_SHARED_LIB + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dll + CACHE INTERNAL "Fluid framework lib") +endif() + +if(APPLE) + set(FLUID_FRAMEWORK_SHARED_LIB + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dylib + CACHE INTERNAL "Fluid framework lib") +endif() diff --git a/paddle/fluid/framework/c/c_api.cc b/paddle/fluid/framework/c/c_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..ab987fb56686594f505e63b6664c2176e5a4ad89 --- /dev/null +++ b/paddle/fluid/framework/c/c_api.cc @@ -0,0 +1,58 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/c/c_api.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/init.h" + +extern "C" { + +paddle::framework::OpInfoMap &PD_GetOpInfoMap() { + return paddle::framework::OpInfoMap::Instance(); +} + +void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool) { + paddle::platform::DeviceContextPool::SetPool(pool); +} + +std::vector PD_GetGradOpDescStrs( + const paddle::framework::OpDesc &op_desc, + const std::unordered_set &no_grad_set, + std::unordered_map *grad_to_var, + const std::vector &grad_block) { + auto &op_info = PD_GetOpInfoMap().Get(op_desc.Type()); + std::vector ret; + if (op_info.grad_op_maker_) { + auto grad_op_descs = + op_info.grad_op_maker_(op_desc, no_grad_set, grad_to_var, grad_block); + size_t op_num = grad_op_descs.size(); + ret.resize(op_num); + for (size_t i = 0; i < op_num; ++i) { + PADDLE_ENFORCE_EQ( + grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true, + "Cannot serialize message."); + } + } + return ret; +} + +} // end extern "C" diff --git a/paddle/fluid/framework/c/c_api.h b/paddle/fluid/framework/c/c_api.h new file mode 100644 index 0000000000000000000000000000000000000000..04dbfbebe5d7646e93678fff3418c4eed6bc691a --- /dev/null +++ b/paddle/fluid/framework/c/c_api.h @@ -0,0 +1,46 @@ +/* copyright (c) 2019 paddlepaddle authors. all rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/platform/device_context.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// C-API to get global OpInfo map. +paddle::framework::OpInfoMap &PD_GetOpInfoMap(); + +// C-API to init global DeviceContextPool from outside. +void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool); + +// C-API to serialize the grad op protocol message to a binary string. +std::vector PD_GetGradOpDescStrs( + const paddle::framework::OpDesc &op_desc, + const std::unordered_set &no_grad_set, + std::unordered_map *grad_to_var, + const std::vector &grad_block); + +#ifdef __cplusplus +} +#endif diff --git a/paddle/fluid/framework/load_op_lib.h b/paddle/fluid/framework/load_op_lib.h new file mode 100644 index 0000000000000000000000000000000000000000..1f9a56a2f6891119ec23f83a6cc99f00912f2e71 --- /dev/null +++ b/paddle/fluid/framework/load_op_lib.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/platform/dynload/dynamic_loader.h" +#include "paddle/fluid/platform/port.h" + +namespace paddle { +namespace framework { + +template +T *DynLoad(void *handle, std::string name) { + T *func = reinterpret_cast(dlsym(handle, name.c_str())); +#if !defined(_WIN32) + auto errorno = dlerror(); +#else + auto errorno = GetLastError(); +#endif // !_WIN32 + PADDLE_ENFORCE_NOT_NULL(func, errorno); + return func; +} + +void LoadOpLib(const std::string &dso_name) { + void *handle = paddle::platform::dynload::GetOpDsoHandle(dso_name); + + typedef OpInfoMap &get_op_info_t(); + get_op_info_t *get_op_info = + DynLoad(handle, "PD_GetOpInfoMap"); + auto &op_info = get_op_info(); + auto *dyn_info_map = op_info.mutable_map(); + + typedef std::vector grad_op_desc_maker_t( + const OpDesc &, const std::unordered_set &, + std::unordered_map *, + const std::vector &); + + grad_op_desc_maker_t *grad_op_desc_maker = + DynLoad(handle, "PD_GetGradOpDescStrs"); + + auto &info_map = OpInfoMap::Instance(); + for (const auto &n : *(dyn_info_map)) { + auto type = n.first; + if (type == "recurrent" || type == "recurrent_grad" || + type == "conditional_block" || type == "conditional_block_grad") { + continue; + } + if (info_map.Has(n.first)) { + PADDLE_THROW("Op %s has been registered."); + } + OpInfo info; + info.creator_ = n.second.creator_; + + // If get the protocol buffer from dynamic library directly, there + // will be deconstruction error + // ** Error in `python`: free(): invalid pointer: + // ... paddle::framework::proto::OpDesc::SharedDtor() + // It seems a bug in protobuf, see + // https://github.com/protocolbuffers/protobuf/issues/435 + // So, get the serialized binary string from dynamic library, + // then deserialize to protocol buffer. + info.grad_op_maker_ = [grad_op_desc_maker]( + const OpDesc &op_desc, + const std::unordered_set &no_grad_set, + std::unordered_map *grad_to_var, + const std::vector &grad_block) { + std::vector strs = + grad_op_desc_maker(op_desc, no_grad_set, grad_to_var, grad_block); + std::vector> ret; + for (auto &str : strs) { + proto::OpDesc proto_desc; + PADDLE_ENFORCE_EQ(proto_desc.ParseFromString(str), true, + "Failed to parse OpDesc from string"); + ret.emplace_back(new OpDesc(proto_desc, nullptr)); + } + return ret; + }; + info.proto_ = n.second.proto_; + info.checker_ = n.second.checker_; + info.infer_var_type_ = n.second.infer_var_type_; + info.infer_shape_ = n.second.infer_shape_; + info.infer_inplace_ = n.second.infer_inplace_; + info.infer_no_need_buffer_vars_ = n.second.infer_no_need_buffer_vars_; + info.use_default_grad_op_desc_maker_ = + n.second.use_default_grad_op_desc_maker_; + + info_map.Insert(type, info); + } + + typedef void init_device_t(platform::DeviceContextPool *); + init_device_t *init_dev = + DynLoad(handle, "PD_InitDevicesPool"); + init_dev(&(platform::DeviceContextPool::Instance())); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 3504f62b7bdaa523deb2ae2074cf0d22cfe93851..4fec2e9350f800d5414f8f2e71f9ef70d578fb25 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -331,6 +331,8 @@ class DeviceContextPool { return *pool; } + static void SetPool(DeviceContextPool* dev_pool) { pool = dev_pool; } + /*! \brief Return handle of single device context. */ platform::DeviceContext* Get(const platform::Place& place); diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 4a1cd5a8db7fa9b8f8fdd9427c7a26e5c90cc95f..48848bd84fe8bff72a2744f9c56034772f03c6c2 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -46,6 +46,8 @@ DEFINE_string( DEFINE_string(mklml_dir, "", "Specify path for loading libmklml_intel.so."); +DEFINE_string(op_dir, "", "Specify path for loading user-defined op library."); + namespace paddle { namespace platform { namespace dynload { @@ -280,6 +282,16 @@ void* GetMKLMLDsoHandle() { #endif } +void* GetOpDsoHandle(const std::string& dso_name) { +#if defined(__APPLE__) || defined(__OSX__) + PADDLE_THROW("Do not support Apple."); +#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) + PADDLE_THROW("Do not support Windows."); +#else + return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name); +#endif +} + } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h index df101474aa4e158198baf92ca389d23239ba6f47..4940411ccf7ed8a98cf667f36a6841224a50460c 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.h +++ b/paddle/fluid/platform/dynload/dynamic_loader.h @@ -35,6 +35,7 @@ void* GetWarpCTCDsoHandle(); void* GetNCCLDsoHandle(); void* GetTensorRtDsoHandle(); void* GetMKLMLDsoHandle(); +void* GetOpDsoHandle(const std::string& dso_name); void SetPaddleLibPath(const std::string&); } // namespace dynload diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d96d59edfb03212e698b8ed47e17342b2013a6f3..f6e56b4f8a0f23fbe1d4b12ea32d9078aeff1c5d 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -29,6 +29,7 @@ limitations under the License. */ #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/ir/coalesce_grad_tensor_pass.h" #include "paddle/fluid/framework/ir/pass_builder.h" +#include "paddle/fluid/framework/load_op_lib.h" #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" @@ -1053,6 +1054,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("init_gflags", framework::InitGflags); m.def("init_glog", framework::InitGLOG); m.def("init_dgc", framework::InitDGC); + m.def("load_op_library", framework::LoadOpLib); m.def("init_devices", [](bool init_p2p) { framework::InitDevices(init_p2p); }); diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index fccc4bb09951328cc5a827bdf861f5ab714feb5f..925fd55876b29374e96b4a598f8b551fc49bae2b 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -32,3 +32,4 @@ import paddle.batch import paddle.compat import paddle.distributed batch = batch.batch +import paddle.sysconfig diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index e82103772213ccba70600749fab8b5eddc7eb2dd..d4abfeca44be483cca4d6a94bc6edac09b80a6ee 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -47,6 +47,7 @@ __all__ = [ 'in_dygraph_mode', 'is_compiled_with_cuda', 'Variable', + 'load_op_library', ] EMPTY_VAR_NAME = core.kEmptyVarName() @@ -1300,6 +1301,12 @@ class OpProtoHolder(object): raise ValueError("Operator \"%s\" has not been registered." % type) return self.op_proto_map[type] + def update_op_proto(self): + op_protos = get_all_op_protos() + for proto in op_protos: + if proto.type not in self.op_proto_map: + self.op_proto_map[proto.type] = proto + @staticmethod def generated_op_attr_names(): return { @@ -4327,3 +4334,25 @@ def _dygraph_place_guard(place): yield _dygraph_current_expected_place_ = tmp_place + + +def load_op_library(lib_filename): + """ + Load a dynamic library, including custom operators and kernels. + When library is loaded, ops and kernels registered in the library + will be available in PaddlePaddle main process. + Please note, the type of custom operators cann't have the same type + with the existing operators in the framework. + + Args: + lib_filename (str): name of dynamic library. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + #fluid.load_op_library('custom_op.so') + + """ + core.load_op_library(lib_filename) + OpProtoHolder.instance().update_op_proto() diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 11e3c4938bef4a3c97a724798e2f7273c25f06ed..19822e410c71aa993e2d90a92c57c3522023ad81 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -17,7 +17,7 @@ from __future__ import print_function import copy import six -from .framework import Parameter, dtype_is_floating, in_dygraph_mode +from .framework import Parameter, dtype_is_floating, in_dygraph_mode, OpProtoHolder from . import unique_name from paddle.fluid.initializer import Constant, Xavier from .param_attr import ParamAttr diff --git a/python/paddle/fluid/tests/CMakeLists.txt b/python/paddle/fluid/tests/CMakeLists.txt index 8cfd026f8ff8e044ffbd2cc76c34843072261ab1..41cf3cfa3743b610bb489cdb2292fcf7dd6878d9 100644 --- a/python/paddle/fluid/tests/CMakeLists.txt +++ b/python/paddle/fluid/tests/CMakeLists.txt @@ -11,3 +11,7 @@ endforeach() add_subdirectory(unittests) add_subdirectory(book) + +if(NOT APPLE AND NOT WIN32) + add_subdirectory(custom_op) +endif() diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5209c742b5c72b4c0e5de67493cf0d75f0b7b379 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt @@ -0,0 +1,25 @@ +if (WITH_GPU) + nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared) +else() + cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared) +endif() +set_target_properties(relu_op_shared PROPERTIES OUTPUT_NAME relu2_op) +target_link_libraries(relu_op_shared ${FLUID_FRAMEWORK_SHARED_LIB}) + +# remove the linked glog and gflags when compling relu_op_shared +# otherwise, there is running error: +# ERROR: something wrong with flag 'logtostderr' in file +# 'third_party/glog/src/extern_glog/src/logging.cc'. +# One possibility: file 'third_party/glog/src/extern_glog/src/logging.cc' +# is being linked both statically and dynamically into this executable. +get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES) +LIST(REMOVE_ITEM TARGET_LIBRARIES glog) +LIST(REMOVE_ITEM TARGET_LIBRARIES gflags) +set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES} ) + +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/custom_op/relu_op.cc b/python/paddle/fluid/tests/custom_op/relu_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f1a3e29b1163ec0d09e4031142a9891578808890 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/relu_op.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class Relu2Op : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + auto in_dims = ctx->GetInputDim("X"); + ctx->SetOutputDim("Y", in_dims); + } +}; + +class Relu2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The input tensor."); + AddOutput("Y", "Output of relu_op"); + AddComment(R"DOC( +Relu2 Operator. +)DOC"); + } +}; + +class Relu2GradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + auto in_dims = ctx->GetInputDim(framework::GradVarName("Y")); + ctx->SetOutputDim(framework::GradVarName("X"), in_dims); + } +}; + +class Relu2GradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + std::unique_ptr Apply() const override { + auto* op = new framework::OpDesc(); + op->SetType("relu2_grad"); + op->SetInput("Y", Output("Y")); + op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); + op->SetAttrMap(Attrs()); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + return std::unique_ptr(op); + } +}; + +using Tensor = framework::Tensor; + +template +class Relu2Kernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in_t = ctx.Input("X"); + auto* out_t = ctx.Output("Y"); + auto x = in_t->data(); + auto y = out_t->mutable_data(ctx.GetPlace()); + for (int i = 0; i < in_t->numel(); ++i) { + y[i] = std::max(static_cast(0.), x[i]); + } + } +}; + +template +class Relu2GradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* dy_t = ctx.Input(framework::GradVarName("Y")); + auto* y_t = ctx.Input("Y"); + auto* dx_t = ctx.Output(framework::GradVarName("X")); + + auto dy = dy_t->data(); + auto y = y_t->data(); + auto dx = dx_t->mutable_data(ctx.GetPlace()); + + for (int i = 0; i < y_t->numel(); ++i) { + dx[i] = dy[i] * (y[i] > static_cast(0) ? 1. : 0.); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +using CPU = paddle::platform::CPUDeviceContext; +REGISTER_OPERATOR(relu2, ops::Relu2Op, ops::Relu2OpMaker, ops::Relu2GradMaker); +REGISTER_OPERATOR(relu2_grad, ops::Relu2GradOp); +REGISTER_OP_CPU_KERNEL(relu2, + ops::Relu2Kernel, + ops::Relu2Kernel); +REGISTER_OP_CPU_KERNEL(relu2_grad, + ops::Relu2GradKernel, + ops::Relu2GradKernel); diff --git a/python/paddle/fluid/tests/custom_op/relu_op.cu b/python/paddle/fluid/tests/custom_op/relu_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..53ad75e413d92aef61e5bfe6348581e6428bbc3a --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/relu_op.cu @@ -0,0 +1,87 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +__global__ void KeRelu2(const T* x, const int num, T* y) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + for (int i = gid; i < num; i += blockDim.x * gridDim.x) { + y[i] = max(x[i], static_cast(0.)); + } +} + +template +class Relu2CUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in_t = ctx.Input("X"); + auto* out_t = ctx.Output("Y"); + auto x = in_t->data(); + auto y = out_t->mutable_data(ctx.GetPlace()); + + auto& dev_ctx = ctx.template device_context(); + + int num = in_t->numel(); + int block = 512; + int grid = (num + block - 1) / block; + KeRelu2<<>>(x, num, y); + } +}; + +template +__global__ void KeRelu2Grad(const T* y, const T* dy, const int num, T* dx) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + for (int i = gid; i < num; i += blockDim.x * gridDim.x) { + dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.); + } +} + +template +class Relu2GradCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* dy_t = ctx.Input(framework::GradVarName("Y")); + auto* y_t = ctx.Input("Y"); + auto* dx_t = ctx.Output(framework::GradVarName("X")); + + auto dy = dy_t->data(); + auto y = y_t->data(); + auto dx = dx_t->mutable_data(ctx.GetPlace()); + + auto& dev_ctx = ctx.template device_context(); + + int num = dy_t->numel(); + int block = 512; + int grid = (num + block - 1) / block; + KeRelu2Grad<<>>(y, dy, num, dx); + } +}; + +} // namespace operators +} // namespace paddle + +using CUDA = paddle::platform::CUDADeviceContext; +REGISTER_OP_CUDA_KERNEL(relu2, + paddle::operators::Relu2CUDAKernel, + paddle::operators::Relu2CUDAKernel); + +REGISTER_OP_CUDA_KERNEL(relu2_grad, + paddle::operators::Relu2GradCUDAKernel, + paddle::operators::Relu2GradCUDAKernel); diff --git a/python/paddle/fluid/tests/custom_op/test_custom_op.py b/python/paddle/fluid/tests/custom_op/test_custom_op.py new file mode 100644 index 0000000000000000000000000000000000000000..0d02da53d66d3a0ad3160f130153f013db92e1c9 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_custom_op.py @@ -0,0 +1,112 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import unittest +import contextlib + +import paddle +import paddle.fluid as fluid + +file_dir = os.path.dirname(os.path.abspath(__file__)) +fluid.load_op_library(os.path.join(file_dir, 'librelu2_op.so')) + +from paddle.fluid.layer_helper import LayerHelper + + +def relu2(x, name=None): + helper = LayerHelper("relu2", **locals()) + out = helper.create_variable( + type=x.type, name=name, dtype=x.dtype, persistable=False) + helper.append_op(type="relu2", inputs={"X": x}, outputs={"Y": out}) + return out + + +@contextlib.contextmanager +def scope_prog_guard(): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + +def linear_fc(data, label, use_custom_relu): + hidden = fluid.layers.fc(data, size=128) + hidden = relu2(hidden) if use_custom_relu else fluid.layers.relu(hidden) + hidden = fluid.layers.fc(hidden, size=128) + hidden = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=hidden, label=label) + loss = fluid.layers.mean(loss) + return loss + + +def custom_op_test(use_gpu=True, use_custom_relu=True): + with scope_prog_guard(): + np.random.seed(0) + fluid.default_startup_program().random_seed = 10 + fluid.default_main_program().random_seed = 10 + + data = fluid.layers.data( + name='data', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + loss = linear_fc(data, label, use_custom_relu) + + optimizer = fluid.optimizer.Momentum(learning_rate=0.1, momentum=0.9) + optimizer.minimize(loss) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + compile_program = fluid.compiler.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name) + + reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=32) + feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + + num = 4 + for i, data in enumerate(reader()): + outs, = exe.run(compile_program, + feed=feeder.feed(data), + fetch_list=[loss]) + if i == num: + break + return outs + + +class CustomOpTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + os.environ['CPU_NUM'] = str(2) + + def test_cpu(self): + actual = custom_op_test(False, True) + expect = custom_op_test(False, False) + self.assertEqual(actual.all(), expect.all()) + + def test_gpu(self): + if not fluid.core.is_compiled_with_cuda(): + return + actual = custom_op_test(True, True) + expect = custom_op_test(True, False) + self.assertEqual(actual.all(), expect.all()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/custom_op/test_sysconfig.py b/python/paddle/fluid/tests/custom_op/test_sysconfig.py new file mode 100644 index 0000000000000000000000000000000000000000..78c0cdf0316ea901d613d6ff34f58504d3f493b0 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_sysconfig.py @@ -0,0 +1,35 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import paddle + + +class SysConfigTest(unittest.TestCase): + def test_include(self): + inc_dir = paddle.sysconfig.get_include() + inc_dirs = inc_dir.split(os.sep) + self.assertEqual(inc_dirs[-1], 'include') + self.assertEqual(inc_dirs[-2], 'paddle') + + def test_libs(self): + lib_dir = paddle.sysconfig.get_lib() + lib_dirs = lib_dir.split(os.sep) + self.assertEqual(lib_dirs[-1], 'libs') + self.assertEqual(lib_dirs[-2], 'paddle') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/sysconfig.py b/python/paddle/sysconfig.py new file mode 100644 index 0000000000000000000000000000000000000000..2ce327c76961ad2febc020ed1a2595b7aad459a0 --- /dev/null +++ b/python/paddle/sysconfig.py @@ -0,0 +1,51 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +__all__ = ['get_include', 'get_lib'] + + +def get_include(): + """ + Get the directory containing the PaddlePaddle C++ header files. + Returns: + The directory as string. + + Examples: + .. code-block:: python + + import paddle + include_dir = paddle.sysconfig.get_include() + + """ + import paddle + return os.path.join(os.path.dirname(paddle.__file__), 'include') + + +def get_lib(): + """ + Get the directory containing the libpaddle_framework. + Returns: + The directory as string. + + Examples: + .. code-block:: python + + import paddle + include_dir = paddle.sysconfig.get_lib() + + """ + import paddle + return os.path.join(os.path.dirname(paddle.__file__), 'libs') diff --git a/python/setup.py.in b/python/setup.py.in index 3288b6152c0f62978184b791c738a9e39919708e..e0bfd72fac5ce46afaca3ab0f9c8a3c322df7ee6 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,9 +1,15 @@ -from setuptools import setup, Distribution, Extension import subprocess import os import re import shutil import sys +import fnmatch + +from setuptools import Command +from setuptools import setup, Distribution, Extension +from setuptools.command.install import install as InstallCommandBase + + class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -220,11 +226,19 @@ if '${WITH_NGRAPH}' == 'ON': package_data['paddle.libs']+=['${NGRAPH_SHARED_LIB_NAME}', '${NGRAPH_CPU_LIB_NAME}', '${NGRAPH_TBB_LIB_NAME}'] + +# copy libfuild_framework.so to libs +if os.name != 'nt' and sys.platform != 'darwin': + paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}' + shutil.copy(paddle_framework_lib, libs_path) + package_data['paddle.libs'] += [('libpaddle_framework' if os.name != 'nt' else 'paddle_framework') + ext_name] + # remove unused paddle/libs/__init__.py if os.path.isfile(libs_path+'/__init__.py'): os.remove(libs_path+'/__init__.py') package_dir['paddle.libs']=libs_path + # change rpath of ${FLUID_CORE_NAME}.ext, add $ORIGIN/../libs/ to it. # The reason is that libwarpctc.ext, libiomp5.ext etc are in paddle.libs, and # ${FLUID_CORE_NAME}.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. @@ -250,6 +264,93 @@ if os.name == 'nt': elif sys.platform == 'darwin': ext_modules = [] +def find_files(pattern, root): + for dirpath, _, files in os.walk(root): + for filename in fnmatch.filter(files, pattern): + yield os.path.join(dirpath, filename) + +headers = ( + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/platform')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/string')) + + list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/framework')) + + ['${EIGEN_INCLUDE_DIR}/Eigen/Core'] + # eigen + list(find_files('*', '${EIGEN_INCLUDE_DIR}/Eigen/src')) + # eigen + list(find_files('*', '${EIGEN_INCLUDE_DIR}/unsupported/Eigen')) + # eigen + list(find_files('*', '${GFLAGS_INSTALL_DIR}/include')) + # gflags + list(find_files('*', '${GLOG_INSTALL_DIR}/include')) + # glog + list(find_files('*', '${BOOST_INCLUDE_DIR}/boost')) + # boost + list(find_files('*', '${XXHASH_INSTALL_DIR}/include')) + # xxhash + list(find_files('*', '${PROTOBUF_INCLUDE_DIR}')) + # protobuf + list(find_files('*.h', '${THREADPOOL_INCLUDE_DIR}'))) # threadpool + + +class InstallCommand(InstallCommandBase): + def finalize_options(self): + ret = InstallCommandBase.finalize_options(self) + self.install_headers = os.path.join(self.install_purelib, 'paddle', + 'include') + self.install_lib = self.install_platlib + return ret + + +class InstallHeaders(Command): + """Override how headers are copied. + """ + description = 'install C/C++ header files' + + user_options = [('install-dir=', 'd', + 'directory to install header files to'), + ('force', 'f', + 'force installation (overwrite existing files)'), + ] + + boolean_options = ['force'] + + def initialize_options(self): + self.install_dir = None + self.force = 0 + self.outfiles = [] + + def finalize_options(self): + self.set_undefined_options('install', + ('install_headers', 'install_dir'), + ('force', 'force')) + + def mkdir_and_copy_file(self, header): + if 'pb.h' in header: + install_dir = re.sub('${PADDLE_BINARY_DIR}/', '', header) + elif 'third_party' not in header: + # framework + install_dir = re.sub('@PADDLE_SOURCE_DIR@/', '', header) + else: + # third_party + install_dir = re.sub('${THIRD_PARTY_PATH}', 'third_party', header) + install_dir = re.sub('src/extern_eigen3/', '', install_dir) + install_dir = re.sub('src/extern_boost/', '', install_dir) + install_dir = os.path.join(self.install_dir, os.path.dirname(install_dir)) + if not os.path.exists(install_dir): + self.mkpath(install_dir) + return self.copy_file(header, install_dir) + + def run(self): + if os.name == 'nt' or sys.platform == 'darwin': + return + hdrs = self.distribution.headers + if not hdrs: + return + self.mkpath(self.install_dir) + for header in hdrs: + (out, _) = self.mkdir_and_copy_file(header) + self.outfiles.append(out) + + def get_inputs(self): + return self.distribution.headers or [] + + def get_outputs(self): + return self.outfiles + setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}', description='Parallel Distributed Deep Learning', @@ -259,5 +360,10 @@ setup(name='${PACKAGE_NAME}', package_data=package_data, package_dir=package_dir, scripts=paddle_bins, - distclass=BinaryDistribution + distclass=BinaryDistribution, + headers=headers, + cmdclass={ + 'install_headers': InstallHeaders, + 'install': InstallCommand, + } )