diff --git a/paddle/fluid/extension/include/op_meta_info.h b/paddle/fluid/extension/include/op_meta_info.h index 1bc044f647fbae0c4666ecda9e2a2fc3dc8ef214..9c8d9fa40f13d75af2eea970d18efc1132b3f210 100644 --- a/paddle/fluid/extension/include/op_meta_info.h +++ b/paddle/fluid/extension/include/op_meta_info.h @@ -38,6 +38,8 @@ class PD_DLL_DECL OpMetaInfoHelper; using Tensor = paddle::Tensor; +///////////////// Util Marco Define //////////////// + #define PD_DISABLE_COPY_AND_ASSIGN(classname) \ private: \ classname(const classname&) = delete; \ @@ -65,6 +67,12 @@ using Tensor = paddle::Tensor; END_HANDLE_THE_ERROR \ } while (0) +#define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ + struct __test_global_namespace_##uniq_name##__ {}; \ + static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \ + __test_global_namespace_##uniq_name##__>::value, \ + msg) + ///////////////// Util Define and Function //////////////// inline std::string Grad(const std::string& var_name) { @@ -288,9 +296,9 @@ class PD_DLL_DECL OpMetaInfo { std::vector attrs_; // 2. func info - KernelFunc kernel_fn_; - InferShapeFunc infer_shape_fn_; - InferDtypeFunc infer_dtype_fn_; + KernelFunc kernel_fn_{nullptr}; + InferShapeFunc infer_shape_fn_{nullptr}; + InferDtypeFunc infer_dtype_fn_{nullptr}; }; //////////////// Op Meta Info Map ///////////////// @@ -321,20 +329,22 @@ class PD_DLL_DECL OpMetaInfoMap { class PD_DLL_DECL OpMetaInfoBuilder { public: - explicit OpMetaInfoBuilder(std::string&& name); + explicit OpMetaInfoBuilder(std::string&& name, size_t index); OpMetaInfoBuilder& Inputs(std::vector&& inputs); OpMetaInfoBuilder& Outputs(std::vector&& outputs); OpMetaInfoBuilder& Attrs(std::vector&& attrs); OpMetaInfoBuilder& SetKernelFn(KernelFunc func); OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func); OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func); - OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name); private: // Forward Op name std::string name_; - // Point to the currently constructed op meta info + // ref current info ptr OpMetaInfo* info_ptr_; + // The current op meta info index in vector + // - 0: op, 1: grad_op, 2: grad_grad_op + size_t index_; }; /////////////////////// Op register API ///////////////////////// @@ -350,14 +360,25 @@ void LoadCustomOperatorLib(const std::string& dso_name); /////////////////////// Op register Macro ///////////////////////// -#define PD_BUILD_OP_WITH_COUNTER(op_name, counter) \ - static ::paddle::OpMetaInfoBuilder __op_meta_info_##counter##__ = \ - ::paddle::OpMetaInfoBuilder(op_name) - -#define PD_BUILD_OP_INNER(op_name, counter) \ - PD_BUILD_OP_WITH_COUNTER(op_name, counter) - -#define PD_BUILD_OP(op_name) PD_BUILD_OP_INNER(op_name, __COUNTER__) +#define PD_BUILD_OP(op_name) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op__##op_name, "PD_BUILD_OP must be called in global namespace."); \ + static ::paddle::OpMetaInfoBuilder __op_meta_info_##op_name##__ = \ + ::paddle::OpMetaInfoBuilder(#op_name, 0) + +#define PD_BUILD_GRAD_OP(op_name) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_grad_op__##op_name, \ + "PD_BUILD_GRAD_OP must be called in global namespace."); \ + static ::paddle::OpMetaInfoBuilder __grad_op_meta_info_##op_name##__ = \ + ::paddle::OpMetaInfoBuilder(#op_name, 1) + +#define PD_BUILD_DOUBLE_GRAD_OP(op_name) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_grad_grad_op__##op_name, \ + "PD_BUILD_DOUBLE_GRAD_OP must be called in global namespace."); \ + static ::paddle::OpMetaInfoBuilder __grad_grad_op_meta_info_##op_name##__ = \ + ::paddle::OpMetaInfoBuilder(#op_name, 2) } // namespace paddle diff --git a/paddle/fluid/extension/src/op_meta_info.cc b/paddle/fluid/extension/src/op_meta_info.cc index d362282b8d9d24c287e51643d3aca72d9fd36c50..20129435f26b1423d046a5ffea07b1c1c2b226af 100644 --- a/paddle/fluid/extension/src/op_meta_info.cc +++ b/paddle/fluid/extension/src/op_meta_info.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/custom_operator.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -62,11 +63,38 @@ OpMetaInfoMap::GetMap() const { //////////////// Op Meta Info Builder ///////////////// -OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name) { +OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name, size_t index) { + // 1. member assign name_ = std::forward(name); + index_ = index; + + // 2. check and meta info build auto& info_vector = OpMetaInfoMap::Instance()[name_]; + // index check + PADDLE_ENFORCE_EQ( + info_vector.size(), index_, + platform::errors::PreconditionNotMet( + "The operator %s's meta info register failed. " + "Please make sure you call marcos as order `PD_BUILD_OP`, " + "`PD_BUILD_GRAD_OP`, `PD_BUILD_DOUBLE_GRAD_OP`.", + name_)); + switch (index_) { + case 0: + break; + case 1: + name_ = name_ + "_grad"; + break; + case 2: + name_ = name_ + "_grad_grad"; + default: + PADDLE_THROW(platform::errors::InvalidArgument( + "Not support index `%d` when construct OpMetaInfoBuilder, " + "now only support `0, 1, 2`.", + index_)); + } auto op_meta = OpMetaInfo(name_); info_vector.emplace_back(std::move(op_meta)); + // 3. get current info ptr info_ptr_ = &(info_vector.back()); } @@ -93,24 +121,27 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) { } OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) { + PADDLE_ENFORCE_EQ( + index_, 0UL, + platform::errors::Unimplemented( + "Currently, the InferShapeFn setting of Grad Op is not supported, " + "And backward Tensor `X@GRAD` will use the shape of forward Tensor " + "`X` by default.")); info_ptr_->SetInferShapeFn(std::forward(func)); return *this; } OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) { + PADDLE_ENFORCE_EQ( + index_, 0UL, + platform::errors::Unimplemented( + "Currently, the InferDtypeFn setting of Grad Op is not supported, " + "And backward Tensor `X@GRAD` will use the dtype of forward Tensor " + "`X` by default.")); info_ptr_->SetInferDtypeFn(std::forward(func)); return *this; } -OpMetaInfoBuilder& OpMetaInfoBuilder::SetBackwardOp( - const std::string& bwd_op_name) { - auto& info_vector = OpMetaInfoMap::Instance()[name_]; - auto op_meta = OpMetaInfo(bwd_op_name); - info_vector.emplace_back(std::move(op_meta)); - info_ptr_ = &(info_vector.back()); - return *this; -} - /////////////////////// Op register API ///////////////////////// void RegisterAllCustomOperator() { diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index 03a8cc366e7f2e8bb3baa2dd65ee609533cb8137..90831afc9ba89185dbe85dbf54bb38ea3ffbace6 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -153,12 +153,21 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, } VLOG(1) << "Run ComputeFunc."; - auto outs = func(custom_ins, custom_attrs); + try { + auto outs = func(custom_ins, custom_attrs); - VLOG(1) << "Custom Operator: Share outputs into ExecutionContext."; - for (size_t i = 0; i < outputs.size(); ++i) { - auto* true_out = ctx.Output(outputs[i]); - CustomTensorUtils::ShareDataTo(outs.at(i), true_out); + VLOG(1) << "Custom Operator: Share outputs into ExecutionContext."; + for (size_t i = 0; i < outputs.size(); ++i) { + auto* true_out = ctx.Output(outputs[i]); + CustomTensorUtils::ShareDataTo(outs.at(i), true_out); + } + } catch (platform::EnforceNotMet& exception) { + throw std::move(exception); + } catch (std::exception& ex) { + PADDLE_THROW(platform::errors::External("%s", ex.what())); + } catch (...) { + PADDLE_THROW(platform::errors::Fatal( + "Custom operator raises an unknown exception in rumtime.")); } } @@ -475,58 +484,108 @@ void RegisterOperatorWithMetaInfo( op_name, info.proto_->InitializationErrorString())); // InferShape - PADDLE_ENFORCE_NOT_NULL( - infer_shape_func, - platform::errors::PreconditionNotMet( - "InferShapeFn is nullptr. Need to set the InferShapeFn of custom " - "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); - info.infer_shape_ = [op_inputs, op_outputs, - infer_shape_func](InferShapeContext* ctx) { - std::vector> input_shapes; - - VLOG(1) << "Custom Operator: InferShape - get input ddim."; - for (auto& in_name : op_inputs) { - OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom"); - auto ddim = ctx->GetInputDim(in_name); - input_shapes.emplace_back(framework::vectorize(ddim)); - } + if (infer_shape_func == nullptr) { + // use default InferShape + info.infer_shape_ = [op_inputs, op_outputs](InferShapeContext* ctx) { + PADDLE_ENFORCE_EQ( + op_inputs.size(), 1UL, + platform::errors::Unavailable( + "Your custom operator contains multiple inputs. " + "We only allow a custom operator that contains only one input " + "and " + "only one output without setting the InferShapeFn. At this time, " + "the input shape will be directly set to the output shape.\n" + "Please set the InferShapeFn of custom " + "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); + PADDLE_ENFORCE_EQ( + op_outputs.size(), 1UL, + platform::errors::Unavailable( + "Your custom operator contains multiple outputs. " + "We only allow a custom operator that contains only one input " + "and " + "only one output without setting the InferShapeFn. At this time, " + "the input shape will be directly set to the output shape.\n" + "Please set the InferShapeFn of custom " + "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); + + VLOG(1) << "Custom Operator: Default InferShape - share ddim."; + ctx->ShareDim(op_inputs[0], op_outputs[0]); + }; + } else { + info.infer_shape_ = [op_inputs, op_outputs, + infer_shape_func](InferShapeContext* ctx) { + std::vector> input_shapes; + + VLOG(1) << "Custom Operator: InferShape - get input ddim."; + for (auto& in_name : op_inputs) { + OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom"); + auto ddim = ctx->GetInputDim(in_name); + input_shapes.emplace_back(framework::vectorize(ddim)); + } - VLOG(1) << "Custom Operator: InferShape - calc output ddim."; - auto output_shapes = infer_shape_func(input_shapes); + VLOG(1) << "Custom Operator: InferShape - calc output ddim."; + auto output_shapes = infer_shape_func(input_shapes); - VLOG(1) << "Custom Operator: InferShape - set output ddim."; - for (size_t i = 0; i < op_outputs.size(); ++i) { - ctx->SetOutputDim(op_outputs[i], framework::make_ddim(output_shapes[i])); - } - }; + VLOG(1) << "Custom Operator: InferShape - set output ddim."; + for (size_t i = 0; i < op_outputs.size(); ++i) { + ctx->SetOutputDim(op_outputs[i], + framework::make_ddim(output_shapes[i])); + } + }; + } // Infer Dtype - PADDLE_ENFORCE_NOT_NULL( - infer_dtype_func, - platform::errors::PreconditionNotMet( - "InferDtypeFn is nullptr. Need to set the InferDtypeFn of custom " - "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); - info.infer_var_type_ = [op_inputs, op_outputs, - infer_dtype_func](InferVarTypeContext* ctx) { - std::vector input_dtypes; - - VLOG(1) << "Custom Operator: InferDtype - get input dtype."; - for (auto& in_name : op_inputs) { - auto dtype = ctx->GetInputDataType(in_name); - input_dtypes.emplace_back( - CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype)); - } + if (infer_dtype_func == nullptr) { + // use defalut InferDtype + info.infer_var_type_ = [op_inputs, op_outputs](InferVarTypeContext* ctx) { + PADDLE_ENFORCE_EQ( + op_inputs.size(), 1UL, + platform::errors::Unavailable( + "Your custom operator contains multiple inputs. " + "We only allow a custom operator that contains only one input " + "and " + "only one output without setting the InferDtypeFn. At this time, " + "the input dtype will be directly set to the output dtype.\n" + "Please set the InferDtypeFn of custom " + "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); + PADDLE_ENFORCE_EQ( + op_outputs.size(), 1UL, + platform::errors::Unavailable( + "Your custom operator contains multiple outputs. " + "We only allow a custom operator that contains only one input " + "and " + "only one output without setting the InferDtypeFn. At this time, " + "the input dtype will be directly set to the output dtype.\n" + "Please set the InferDtypeFn of custom " + "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); + + VLOG(1) << "Custom Operator: InferDtype - share dtype."; + auto dtype = ctx->GetInputDataType(op_inputs[0]); + ctx->SetOutputDataType(op_outputs[0], dtype); + }; + } else { + info.infer_var_type_ = [op_inputs, op_outputs, + infer_dtype_func](InferVarTypeContext* ctx) { + std::vector input_dtypes; + + VLOG(1) << "Custom Operator: InferDtype - get input dtype."; + for (auto& in_name : op_inputs) { + auto dtype = ctx->GetInputDataType(in_name); + input_dtypes.emplace_back( + CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype)); + } - VLOG(1) << "Custom Operator: InferDtype - infer output dtype."; - auto output_dtypes = infer_dtype_func(input_dtypes); + VLOG(1) << "Custom Operator: InferDtype - infer output dtype."; + auto output_dtypes = infer_dtype_func(input_dtypes); - VLOG(1) << "Custom Operator: InferDtype - set output dtype."; - for (size_t i = 0; i < op_outputs.size(); ++i) { - ctx->SetOutputDataType( - op_outputs[i], - CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i])); - } - }; + VLOG(1) << "Custom Operator: InferDtype - set output dtype."; + for (size_t i = 0; i < op_outputs.size(); ++i) { + ctx->SetOutputDataType( + op_outputs[i], + CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i])); + } + }; + } // Kernel func RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs, op_attrs); diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt index 3f85f4ef50a223949ef60678b61e97be29aea471..7f94da43535589d75a0e2312c351c13c16f27146 100644 --- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt +++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt @@ -3,10 +3,12 @@ if(WITH_GPU) # 'test_custom_relu_op_setup/jit' compile .cc and .cu file py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py) py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py) + py_test(test_custom_relu_model SRCS test_custom_relu_model.py) # Compiling shared library will cost some time, but running process is very fast. set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250) set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180) + set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180) endif() py_test(test_sysconfig SRCS test_sysconfig.py) diff --git a/python/paddle/fluid/tests/custom_op/attr_test_op.cc b/python/paddle/fluid/tests/custom_op/attr_test_op.cc index 474d3d2d4e2b3b566620a11d41564fb662bd35e3..97aae10613734948e65a76b6854bfffe9bed45a7 100644 --- a/python/paddle/fluid/tests/custom_op/attr_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/attr_test_op.cc @@ -150,15 +150,7 @@ std::vector AttrTestBackward( return {grad_x}; } -std::vector> InferShape(std::vector x_shape) { - return {x_shape}; -} - -std::vector InferDType(paddle::DataType x_dtype) { - return {x_dtype}; -} - -PD_BUILD_OP("attr_test") +PD_BUILD_OP(attr_test) .Inputs({"X"}) .Outputs({"Out"}) .Attrs({"bool_attr: bool", @@ -170,10 +162,9 @@ PD_BUILD_OP("attr_test") "float_vec_attr: std::vector", "int64_vec_attr: std::vector", "str_vec_attr: std::vector"}) - .SetKernelFn(PD_KERNEL(AttrTestForward)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDType)) - .SetBackwardOp("attr_test_grad") + .SetKernelFn(PD_KERNEL(AttrTestForward)); + +PD_BUILD_GRAD_OP(attr_test) .Inputs({paddle::Grad("Out")}) .Outputs({paddle::Grad("X")}) .Attrs({"int_attr: int", diff --git a/python/paddle/fluid/tests/custom_op/custom_relu_op.cc b/python/paddle/fluid/tests/custom_op/custom_relu_op.cc index 0e358e24ae3e814b3fd21d010c478812aa0b8340..4b8d3bca63695c0abfb5dafc0423e292fd157c07 100644 --- a/python/paddle/fluid/tests/custom_op/custom_relu_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_relu_op.cc @@ -96,21 +96,12 @@ std::vector ReluBackward(const paddle::Tensor& x, } } -std::vector> ReluInferShape(std::vector x_shape) { - return {x_shape}; -} - -std::vector ReluInferDType(paddle::DataType x_dtype) { - return {x_dtype}; -} - -PD_BUILD_OP("custom_relu") +PD_BUILD_OP(custom_relu) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(ReluForward)) - .SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) - .SetBackwardOp("relu2_grad") + .SetKernelFn(PD_KERNEL(ReluForward)); + +PD_BUILD_GRAD_OP(custom_relu) .Inputs({"X", "Out", paddle::Grad("Out")}) .Outputs({paddle::Grad("X")}) .SetKernelFn(PD_KERNEL(ReluBackward)); diff --git a/python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc b/python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc index 7319bdd76264508ef485ba80382aac6dcbbeb4b6..89d14bfa049603dd7a97b6d374dfa44c227728e2 100644 --- a/python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc +++ b/python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc @@ -25,19 +25,14 @@ std::vector ReluBackward(const paddle::Tensor& x, const paddle::Tensor& out, const paddle::Tensor& grad_out); -std::vector> ReluInferShape(std::vector x_shape); - -std::vector ReluInferDType(paddle::DataType x_dtype); - // Reuse codes in `custom_relu_op.cc/cu` to register another custom operator // to test jointly compile multi operators at same time. -PD_BUILD_OP("custom_relu_dup") +PD_BUILD_OP(custom_relu_dup) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(ReluForward)) - .SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) - .SetBackwardOp("relu3_grad") + .SetKernelFn(PD_KERNEL(ReluForward)); + +PD_BUILD_GRAD_OP(custom_relu_dup) .Inputs({"X", "Out", paddle::Grad("Out")}) .Outputs({paddle::Grad("X")}) .SetKernelFn(PD_KERNEL(ReluBackward)); diff --git a/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc b/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc index e09ac2f87c80639ac92467e7b50094e7752d500f..720be8b4e377b555ba202fb4f81ee3f7be54e265 100644 --- a/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc @@ -26,14 +26,6 @@ void assign_cpu_kernel(const data_t* x_data, } } -std::vector> InferShape(std::vector x_shape) { - return {x_shape}; -} - -std::vector InferDType(paddle::DataType x_dtype) { - return {x_dtype}; -} - std::vector DispatchTestInterger(const paddle::Tensor& x) { auto out = paddle::Tensor(paddle::PlaceType::kCPU); out.reshape(x.shape()); @@ -47,12 +39,10 @@ std::vector DispatchTestInterger(const paddle::Tensor& x) { return {out}; } -PD_BUILD_OP("dispatch_test_integer") +PD_BUILD_OP(dispatch_test_integer) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(DispatchTestInterger)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDType)); + .SetKernelFn(PD_KERNEL(DispatchTestInterger)); std::vector DispatchTestComplex(const paddle::Tensor& x) { auto out = paddle::Tensor(paddle::PlaceType::kCPU); @@ -67,12 +57,10 @@ std::vector DispatchTestComplex(const paddle::Tensor& x) { return {out}; } -PD_BUILD_OP("dispatch_test_complex") +PD_BUILD_OP(dispatch_test_complex) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(DispatchTestComplex)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDType)); + .SetKernelFn(PD_KERNEL(DispatchTestComplex)); std::vector DispatchTestFloatAndInteger( const paddle::Tensor& x) { @@ -88,12 +76,10 @@ std::vector DispatchTestFloatAndInteger( return {out}; } -PD_BUILD_OP("dispatch_test_float_and_integer") +PD_BUILD_OP(dispatch_test_float_and_integer) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDType)); + .SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger)); std::vector DispatchTestFloatAndComplex( const paddle::Tensor& x) { @@ -109,12 +95,10 @@ std::vector DispatchTestFloatAndComplex( return {out}; } -PD_BUILD_OP("dispatch_test_float_and_complex") +PD_BUILD_OP(dispatch_test_float_and_complex) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDType)); + .SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex)); std::vector DispatchTestFloatAndIntegerAndComplex( const paddle::Tensor& x) { @@ -130,9 +114,7 @@ std::vector DispatchTestFloatAndIntegerAndComplex( return {out}; } -PD_BUILD_OP("dispatch_test_float_and_integer_and_complex") +PD_BUILD_OP(dispatch_test_float_and_integer_and_complex) .Inputs({"X"}) .Outputs({"Out"}) - .SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex)) - .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(InferDType)); + .SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex)); diff --git a/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc b/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc index bece0f49845a5ae3fd006ccf383adb78f043bd4b..17a36df2cde48598efb3945499516c6c70edecfd 100644 --- a/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc @@ -68,7 +68,7 @@ std::vector InferDtype(paddle::DataType x_dtype) { return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32}; } -PD_BUILD_OP("multi_out") +PD_BUILD_OP(multi_out) .Inputs({"X"}) .Outputs({"Out", "Fake_float64", "ZFake_int32"}) .SetKernelFn(PD_KERNEL(MultiOutCPU)) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py new file mode 100644 index 0000000000000000000000000000000000000000..205204168859ad39b59fcaca6524f98a3d09330c --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py @@ -0,0 +1,318 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import numpy as np + +import paddle +from paddle import nn +from paddle.utils.cpp_extension import load, get_build_directory +from paddle.utils.cpp_extension.extension_utils import run_cmd + +from utils import paddle_includes, extra_compile_args + +# Because Windows don't use docker, the shared lib already exists in the +# cache dir, it will not be compiled again unless the shared lib is removed. +file = '{}\\custom_relu_for_model_jit\\custom_relu_for_model_jit.pyd'.format( + get_build_directory()) +if os.name == 'nt' and os.path.isfile(file): + cmd = 'del {}'.format(file) + run_cmd(cmd, True) + +# Compile and load custom op Just-In-Time. +# custom_relu_op_dup.cc is only used for multi ops test, +# not a new op, if you want to test only one op, remove this +# source file +custom_module = load( + name='custom_relu_for_model_jit', + sources=['custom_relu_op.cc', 'custom_relu_op.cu'], + extra_include_paths=paddle_includes, # add for Coverage CI + extra_cxx_cflags=extra_compile_args, # add for Coverage CI + extra_cuda_cflags=extra_compile_args, # add for Coverage CI + verbose=True) + + +class Net(nn.Layer): + """ + A simple exmaple for Regression Model. + """ + + def __init__(self, in_dim, out_dim, use_custom_op=False): + super(Net, self).__init__() + self.fc1 = nn.Linear(in_dim, in_dim) + self.fc2 = nn.Linear(in_dim, out_dim) + self.relu_act = custom_module.custom_relu if use_custom_op else nn.functional.relu + + def forward(self, x): + out = self.fc1(x) + out = self.relu_act(out) + out = self.fc2(out) + out = self.relu_act(out) + + out = paddle.mean(out, axis=-1) + + return out + + +class TestDygraphModel(unittest.TestCase): + def setUp(self): + + self.seed = 2021 + self.in_dim = 10 + self.out_dim = 64 + self.batch_num = 10 + self.batch_size = 4 + self.datas = [ + np.random.uniform( + size=[self.batch_size, self.in_dim]).astype('float32') + for i in range(self.batch_num) + ] + self.labels = [ + np.random.uniform(size=[self.batch_size, 1]).astype('float32') + for i in range(self.batch_num) + ] + + self.devices = ['cpu', 'gpu'] + + # for saving model + self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams" + self.model_dy2stat_path = "infer_model/custom_relu_model_dy2sta" + + # for dy2stat + self.x_spec = paddle.static.InputSpec( + shape=[None, self.in_dim], dtype='float32', name='x') + + def test_train_eval(self): + for device in self.devices: + # set device + paddle.set_device(device) + + # for train + origin_relu_train_out = self.train_model(use_custom_op=False) + custom_relu_train_out = self.train_model(use_custom_op=True) + custom_relu_dy2stat_train_out = self.train_model( + use_custom_op=True, dy2stat=True) # for to_static + + self.assertTrue( + np.array_equal(origin_relu_train_out, custom_relu_train_out)) + self.assertTrue( + np.array_equal(origin_relu_train_out, + custom_relu_dy2stat_train_out)) + + # for eval + origin_relu_eval_out = self.eval_model(use_custom_op=False) + custom_relu_eval_out = self.eval_model(use_custom_op=True) + custom_relu_dy2stat_eval_out = self.eval_model( + use_custom_op=True, dy2stat=True) # for to_static + + self.assertTrue( + np.array_equal(origin_relu_eval_out, custom_relu_eval_out)) + self.assertTrue( + np.array_equal(origin_relu_eval_out, + custom_relu_dy2stat_eval_out)) + + def train_model(self, use_custom_op=False, dy2stat=False): + # reset random seed + paddle.seed(self.seed) + np.random.seed(self.seed) + # paddle.framework.random._manual_program_seed(SEED) + + net = Net(self.in_dim, self.out_dim, use_custom_op) + if dy2stat: + net = paddle.jit.to_static(net, input_spec=[self.x_spec]) + mse_loss = paddle.nn.MSELoss() + sgd = paddle.optimizer.SGD(learning_rate=0.1, + parameters=net.parameters()) + + for batch_id in range(self.batch_num): + x = paddle.to_tensor(self.datas[batch_id]) + y = paddle.to_tensor(self.labels[batch_id]) + + out = net(x) + loss = mse_loss(out, y) + + loss.backward() + sgd.minimize(loss) + net.clear_gradients() + + # save inference model + net.eval() + if dy2stat: + paddle.jit.save(net, self.model_dy2stat_path) + else: + paddle.save(net.state_dict(), + self.model_path_template.format(use_custom_op)) + + return out.numpy() + + def eval_model(self, use_custom_op=False, dy2stat=False): + net = Net(self.in_dim, self.out_dim, use_custom_op) + + if dy2stat: + net = paddle.jit.load(self.model_dy2stat_path) + else: + state_dict = paddle.load( + self.model_path_template.format(use_custom_op)) + net.set_state_dict(state_dict) + + sample_x = paddle.to_tensor(self.datas[0]) + net.eval() + out = net(sample_x) + + return out.numpy() + + +class TestStaticModel(unittest.TestCase): + def setUp(self): + self.seed = 2021 + self.in_dim = 10 + self.out_dim = 64 + self.batch_num = 10 + self.batch_size = 8 + self.datas = [ + np.random.uniform( + size=[self.batch_size, self.in_dim]).astype('float32') + for i in range(self.batch_num) + ] + self.labels = [ + np.random.uniform(size=[self.batch_size, 1]).astype('float32') + for i in range(self.batch_num) + ] + + self.devices = ['cpu', 'gpu'] + + # for saving model + self.model_path_template = "infer_model/custom_relu_static_model_{}_{}" + + paddle.enable_static() + + def tearDown(self): + paddle.disable_static() + + def test_train_eval(self): + for device in self.devices: + # for train + original_relu_train_out = self.train_model( + device, use_custom_op=False) + custom_relu_train_out = self.train_model(device, use_custom_op=True) + # using PE + original_relu_train_pe_out = self.train_model( + device, use_custom_op=False, use_pe=True) + custom_relu_train_pe_out = self.train_model( + device, use_custom_op=True, use_pe=True) + print(original_relu_train_out) + print(custom_relu_train_out) + print(original_relu_train_pe_out) + print(custom_relu_train_pe_out) + + self.assertTrue( + np.array_equal(original_relu_train_out, custom_relu_train_out)) + self.assertTrue( + np.array_equal(original_relu_train_pe_out, + custom_relu_train_pe_out)) + + # for eval + original_relu_eval_out = self.eval_model( + device, use_custom_op=False) + custom_relu_eval_out = self.eval_model(device, use_custom_op=True) + # using PE + original_relu_eval_pe_out = self.eval_model( + device, use_custom_op=False, use_pe=True) + custom_relu_eval_pe_out = self.eval_model( + device, use_custom_op=True, use_pe=True) + print(original_relu_eval_out) + print(custom_relu_eval_out) + print(original_relu_eval_pe_out) + print(custom_relu_eval_pe_out) + + self.assertTrue( + np.array_equal(original_relu_eval_out, custom_relu_eval_out)) + self.assertTrue( + np.array_equal(original_relu_eval_pe_out, + custom_relu_eval_pe_out)) + + def train_model(self, device, use_custom_op=False, use_pe=False): + # reset random seed + paddle.seed(self.seed) + np.random.seed(self.seed) + # set device + paddle.set_device(device) + + with paddle.static.scope_guard(paddle.static.Scope()): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data( + shape=[None, self.in_dim], name='x', dtype='float32') + y = paddle.static.data( + shape=[None, 1], name='y', dtype='float32') + + net = Net(self.in_dim, self.out_dim, use_custom_op) + out = net(x) + + loss = nn.functional.mse_loss(out, y) + sgd = paddle.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + exe = exe = paddle.static.Executor() + exe.run(paddle.static.default_startup_program()) + + # For PE + if use_pe: + places = paddle.static.cpu_places( + ) if device is 'cpu' else paddle.static.cuda_places() + main_program = paddle.static.CompiledProgram( + paddle.static.default_main_program( + )).with_data_parallel( + loss_name=loss.name, places=places) + else: + main_program = paddle.static.default_main_program() + + for batch_id in range(self.batch_num): + x_data = self.datas[batch_id] + y_data = self.labels[batch_id] + + res = exe.run(main_program, + feed={'x': x_data, + 'y': y_data}, + fetch_list=[out]) + + # save model + paddle.static.save_inference_model( + self.model_path_template.format(use_custom_op, use_pe), + [x], [out], exe) + + return res[0] + + def eval_model(self, device, use_custom_op=False, use_pe=False): + paddle.set_device(device) + + with paddle.static.scope_guard(paddle.static.Scope()): + with paddle.static.program_guard(paddle.static.Program()): + exe = paddle.static.Executor() + + [inference_program, feed_target_names, + fetch_targets] = paddle.static.load_inference_model( + self.model_path_template.format(use_custom_op, use_pe), + exe) + + x_data = self.datas[0] + results = exe.run(inference_program, + feed={feed_target_names[0]: x_data}, + fetch_list=fetch_targets) + + return results[0] + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py index 57bcea658b53c400234afd22d4d5acc77f7f43ce..5d132217bba91f84924cbff9f2bd951d381326cf 100644 --- a/python/paddle/utils/cpp_extension/cpp_extension.py +++ b/python/paddle/utils/cpp_extension/cpp_extension.py @@ -22,11 +22,14 @@ from setuptools.command.easy_install import easy_install from setuptools.command.build_ext import build_ext from distutils.command.build import build -from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context -from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags, add_std_without_repeat, get_build_directory -from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from -from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS, OS_NAME -from .extension_utils import use_new_custom_op_load_method, MSVC_COMPILE_FLAGS +from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag +from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags +from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile +from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from +from .extension_utils import use_new_custom_op_load_method, clean_object_if_change_cflags +from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat + +from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS # Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default, # The solution is: 1.User add function PyInit_[name] 2. set not to export @@ -357,6 +360,13 @@ class BuildExtension(build_ext, object): def build_extensions(self): self._check_abi() + # Note(Aurelius84): If already compiling source before, we should check whether + # cflags have changed and delete the built shared library to re-compile the source + # even though source file content keep unchanaged. + so_name = self.get_ext_fullpath(self.extensions[0].name) + clean_object_if_change_cflags( + os.path.abspath(so_name), self.extensions[0]) + # Consider .cu, .cu.cc as valid source extensions. self.compiler.src_extensions += ['.cu', '.cu.cc'] # Save the original _compile method for later. diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index db2da5574854c27267dd568d4eed1432acd5353f..df953763a66ed34e78952e4a22f55a8d93c5c2fc 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -16,7 +16,9 @@ import os import re import six import sys +import json import glob +import hashlib import logging import collections import textwrap @@ -219,6 +221,106 @@ class CustomOpInfo: return next(reversed(self.op_info_map.items())) +VersionFields = collections.namedtuple('VersionFields', [ + 'sources', + 'extra_compile_args', + 'extra_link_args', + 'library_dirs', + 'runtime_library_dirs', + 'include_dirs', + 'define_macros', + 'undef_macros', +]) + + +class VersionManager: + def __init__(self, version_field): + self.version_field = version_field + self.version = self.hasher(version_field) + + def hasher(self, version_field): + from paddle.fluid.layers.utils import flatten + + md5 = hashlib.md5() + for field in version_field._fields: + elem = getattr(version_field, field) + if not elem: continue + if isinstance(elem, (list, tuple, dict)): + flat_elem = flatten(elem) + md5 = combine_hash(md5, tuple(flat_elem)) + else: + raise RuntimeError( + "Support types with list, tuple and dict, but received {} with {}.". + format(type(elem), elem)) + + return md5.hexdigest() + + @property + def details(self): + return self.version_field._asdict() + + +def combine_hash(md5, value): + """ + Return new hash value. + DO NOT use `hash()` beacuse it doesn't generate stable value between different process. + See https://stackoverflow.com/questions/27522626/hash-function-in-python-3-3-returns-different-results-between-sessions + """ + md5.update(repr(value).encode()) + return md5 + + +def clean_object_if_change_cflags(so_path, extension): + """ + If already compiling source before, we should check whether cflags + have changed and delete the built object to re-compile the source + even though source file content keeps unchanaged. + """ + + def serialize(path, version_info): + assert isinstance(version_info, dict) + with open(path, 'w') as f: + f.write(json.dumps(version_info, indent=4, sort_keys=True)) + + def deserialize(path): + assert os.path.exists(path) + with open(path, 'r') as f: + content = f.read() + return json.loads(content) + + # version file + VERSION_FILE = "version.txt" + base_dir = os.path.dirname(so_path) + so_name = os.path.basename(so_path) + version_file = os.path.join(base_dir, VERSION_FILE) + + # version info + args = [getattr(extension, field, None) for field in VersionFields._fields] + version_field = VersionFields._make(args) + versioner = VersionManager(version_field) + + if os.path.exists(so_path) and os.path.exists(version_file): + old_version_info = deserialize(version_file) + so_version = old_version_info.get(so_name, None) + # delete shared library file if versison is changed to re-compile it. + if so_version is not None and so_version != versioner.version: + log_v( + "Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.". + format(so_name, versioner.version, version_file)) + os.remove(so_path) + # upate new version information + new_version_info = versioner.details + new_version_info[so_name] = versioner.version + serialize(version_file, new_version_info) + else: + # If compile at first time, save compiling detail information for debug. + if not os.path.exists(base_dir): + os.makedirs(base_dir) + details = versioner.details + details[so_name] = versioner.version + serialize(version_file, details) + + def prepare_unix_cudaflags(cflags): """ Prepare all necessary compiled flags for nvcc compiling CUDA files.