未验证 提交 52f7e773 编写于 作者: A Aurelius84 提交者: GitHub

[Cherry-Pick] Split Macros and Add modeling unittest (#31266)

* [CustomOp] Add Modeling with Custom op unittest (#31218)

* add unittest for static/dygraph/dy2stat

* add PE unittet

* remove usless code

* add unittest in CMakeList.txt

* [CustomOp] Split build op marco & polish details (#31229)

* split build op marco & polish details

* revert register api del

* fix other unittest

* [CustomOP]Support Incremental compilation and Add Version management (#31228)

* Support Incremental compilation and Add Version management

* replace hash with hashlib

* fix test_op_num unittest

* Revert "fix test_op_num unittest"

This reverts commit 2f78de976e1d7ca60915b2310717b38a32ae204a.
Co-authored-by: NChen Weihang <chenweihang@baidu.com>
上级 536d9a3b
......@@ -38,6 +38,8 @@ class PD_DLL_DECL OpMetaInfoHelper;
using Tensor = paddle::Tensor;
///////////////// Util Marco Define ////////////////
#define PD_DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname&) = delete; \
......@@ -65,6 +67,12 @@ using Tensor = paddle::Tensor;
END_HANDLE_THE_ERROR \
} while (0)
#define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \
struct __test_global_namespace_##uniq_name##__ {}; \
static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \
__test_global_namespace_##uniq_name##__>::value, \
msg)
///////////////// Util Define and Function ////////////////
inline std::string Grad(const std::string& var_name) {
......@@ -288,9 +296,9 @@ class PD_DLL_DECL OpMetaInfo {
std::vector<std::string> attrs_;
// 2. func info
KernelFunc kernel_fn_;
InferShapeFunc infer_shape_fn_;
InferDtypeFunc infer_dtype_fn_;
KernelFunc kernel_fn_{nullptr};
InferShapeFunc infer_shape_fn_{nullptr};
InferDtypeFunc infer_dtype_fn_{nullptr};
};
//////////////// Op Meta Info Map /////////////////
......@@ -321,20 +329,22 @@ class PD_DLL_DECL OpMetaInfoMap {
class PD_DLL_DECL OpMetaInfoBuilder {
public:
explicit OpMetaInfoBuilder(std::string&& name);
explicit OpMetaInfoBuilder(std::string&& name, size_t index);
OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs);
OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs);
OpMetaInfoBuilder& Attrs(std::vector<std::string>&& attrs);
OpMetaInfoBuilder& SetKernelFn(KernelFunc func);
OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func);
OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name);
private:
// Forward Op name
std::string name_;
// Point to the currently constructed op meta info
// ref current info ptr
OpMetaInfo* info_ptr_;
// The current op meta info index in vector
// - 0: op, 1: grad_op, 2: grad_grad_op
size_t index_;
};
/////////////////////// Op register API /////////////////////////
......@@ -350,14 +360,25 @@ void LoadCustomOperatorLib(const std::string& dso_name);
/////////////////////// Op register Macro /////////////////////////
#define PD_BUILD_OP_WITH_COUNTER(op_name, counter) \
static ::paddle::OpMetaInfoBuilder __op_meta_info_##counter##__ = \
::paddle::OpMetaInfoBuilder(op_name)
#define PD_BUILD_OP_INNER(op_name, counter) \
PD_BUILD_OP_WITH_COUNTER(op_name, counter)
#define PD_BUILD_OP(op_name) PD_BUILD_OP_INNER(op_name, __COUNTER__)
#define PD_BUILD_OP(op_name) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_name, "PD_BUILD_OP must be called in global namespace."); \
static ::paddle::OpMetaInfoBuilder __op_meta_info_##op_name##__ = \
::paddle::OpMetaInfoBuilder(#op_name, 0)
#define PD_BUILD_GRAD_OP(op_name) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_grad_op__##op_name, \
"PD_BUILD_GRAD_OP must be called in global namespace."); \
static ::paddle::OpMetaInfoBuilder __grad_op_meta_info_##op_name##__ = \
::paddle::OpMetaInfoBuilder(#op_name, 1)
#define PD_BUILD_DOUBLE_GRAD_OP(op_name) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_grad_grad_op__##op_name, \
"PD_BUILD_DOUBLE_GRAD_OP must be called in global namespace."); \
static ::paddle::OpMetaInfoBuilder __grad_grad_op_meta_info_##op_name##__ = \
::paddle::OpMetaInfoBuilder(#op_name, 2)
} // namespace paddle
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......@@ -62,11 +63,38 @@ OpMetaInfoMap::GetMap() const {
//////////////// Op Meta Info Builder /////////////////
OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name) {
OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name, size_t index) {
// 1. member assign
name_ = std::forward<std::string>(name);
index_ = index;
// 2. check and meta info build
auto& info_vector = OpMetaInfoMap::Instance()[name_];
// index check
PADDLE_ENFORCE_EQ(
info_vector.size(), index_,
platform::errors::PreconditionNotMet(
"The operator %s's meta info register failed. "
"Please make sure you call marcos as order `PD_BUILD_OP`, "
"`PD_BUILD_GRAD_OP`, `PD_BUILD_DOUBLE_GRAD_OP`.",
name_));
switch (index_) {
case 0:
break;
case 1:
name_ = name_ + "_grad";
break;
case 2:
name_ = name_ + "_grad_grad";
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Not support index `%d` when construct OpMetaInfoBuilder, "
"now only support `0, 1, 2`.",
index_));
}
auto op_meta = OpMetaInfo(name_);
info_vector.emplace_back(std::move(op_meta));
// 3. get current info ptr
info_ptr_ = &(info_vector.back());
}
......@@ -93,24 +121,27 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) {
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) {
PADDLE_ENFORCE_EQ(
index_, 0UL,
platform::errors::Unimplemented(
"Currently, the InferShapeFn setting of Grad Op is not supported, "
"And backward Tensor `X@GRAD` will use the shape of forward Tensor "
"`X` by default."));
info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) {
PADDLE_ENFORCE_EQ(
index_, 0UL,
platform::errors::Unimplemented(
"Currently, the InferDtypeFn setting of Grad Op is not supported, "
"And backward Tensor `X@GRAD` will use the dtype of forward Tensor "
"`X` by default."));
info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetBackwardOp(
const std::string& bwd_op_name) {
auto& info_vector = OpMetaInfoMap::Instance()[name_];
auto op_meta = OpMetaInfo(bwd_op_name);
info_vector.emplace_back(std::move(op_meta));
info_ptr_ = &(info_vector.back());
return *this;
}
/////////////////////// Op register API /////////////////////////
void RegisterAllCustomOperator() {
......
......@@ -153,12 +153,21 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
}
VLOG(1) << "Run ComputeFunc.";
auto outs = func(custom_ins, custom_attrs);
try {
auto outs = func(custom_ins, custom_attrs);
VLOG(1) << "Custom Operator: Share outputs into ExecutionContext.";
for (size_t i = 0; i < outputs.size(); ++i) {
auto* true_out = ctx.Output<Tensor>(outputs[i]);
CustomTensorUtils::ShareDataTo(outs.at(i), true_out);
VLOG(1) << "Custom Operator: Share outputs into ExecutionContext.";
for (size_t i = 0; i < outputs.size(); ++i) {
auto* true_out = ctx.Output<Tensor>(outputs[i]);
CustomTensorUtils::ShareDataTo(outs.at(i), true_out);
}
} catch (platform::EnforceNotMet& exception) {
throw std::move(exception);
} catch (std::exception& ex) {
PADDLE_THROW(platform::errors::External("%s", ex.what()));
} catch (...) {
PADDLE_THROW(platform::errors::Fatal(
"Custom operator raises an unknown exception in rumtime."));
}
}
......@@ -475,58 +484,108 @@ void RegisterOperatorWithMetaInfo(
op_name, info.proto_->InitializationErrorString()));
// InferShape
PADDLE_ENFORCE_NOT_NULL(
infer_shape_func,
platform::errors::PreconditionNotMet(
"InferShapeFn is nullptr. Need to set the InferShapeFn of custom "
"operator by .SetInferShapeFn(PD_INFER_SHAPE(...))"));
info.infer_shape_ = [op_inputs, op_outputs,
infer_shape_func](InferShapeContext* ctx) {
std::vector<std::vector<int64_t>> input_shapes;
VLOG(1) << "Custom Operator: InferShape - get input ddim.";
for (auto& in_name : op_inputs) {
OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom");
auto ddim = ctx->GetInputDim(in_name);
input_shapes.emplace_back(framework::vectorize(ddim));
}
if (infer_shape_func == nullptr) {
// use default InferShape
info.infer_shape_ = [op_inputs, op_outputs](InferShapeContext* ctx) {
PADDLE_ENFORCE_EQ(
op_inputs.size(), 1UL,
platform::errors::Unavailable(
"Your custom operator contains multiple inputs. "
"We only allow a custom operator that contains only one input "
"and "
"only one output without setting the InferShapeFn. At this time, "
"the input shape will be directly set to the output shape.\n"
"Please set the InferShapeFn of custom "
"operator by .SetInferShapeFn(PD_INFER_SHAPE(...))"));
PADDLE_ENFORCE_EQ(
op_outputs.size(), 1UL,
platform::errors::Unavailable(
"Your custom operator contains multiple outputs. "
"We only allow a custom operator that contains only one input "
"and "
"only one output without setting the InferShapeFn. At this time, "
"the input shape will be directly set to the output shape.\n"
"Please set the InferShapeFn of custom "
"operator by .SetInferShapeFn(PD_INFER_SHAPE(...))"));
VLOG(1) << "Custom Operator: Default InferShape - share ddim.";
ctx->ShareDim(op_inputs[0], op_outputs[0]);
};
} else {
info.infer_shape_ = [op_inputs, op_outputs,
infer_shape_func](InferShapeContext* ctx) {
std::vector<std::vector<int64_t>> input_shapes;
VLOG(1) << "Custom Operator: InferShape - get input ddim.";
for (auto& in_name : op_inputs) {
OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom");
auto ddim = ctx->GetInputDim(in_name);
input_shapes.emplace_back(framework::vectorize(ddim));
}
VLOG(1) << "Custom Operator: InferShape - calc output ddim.";
auto output_shapes = infer_shape_func(input_shapes);
VLOG(1) << "Custom Operator: InferShape - calc output ddim.";
auto output_shapes = infer_shape_func(input_shapes);
VLOG(1) << "Custom Operator: InferShape - set output ddim.";
for (size_t i = 0; i < op_outputs.size(); ++i) {
ctx->SetOutputDim(op_outputs[i], framework::make_ddim(output_shapes[i]));
}
};
VLOG(1) << "Custom Operator: InferShape - set output ddim.";
for (size_t i = 0; i < op_outputs.size(); ++i) {
ctx->SetOutputDim(op_outputs[i],
framework::make_ddim(output_shapes[i]));
}
};
}
// Infer Dtype
PADDLE_ENFORCE_NOT_NULL(
infer_dtype_func,
platform::errors::PreconditionNotMet(
"InferDtypeFn is nullptr. Need to set the InferDtypeFn of custom "
"operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))"));
info.infer_var_type_ = [op_inputs, op_outputs,
infer_dtype_func](InferVarTypeContext* ctx) {
std::vector<DataType> input_dtypes;
VLOG(1) << "Custom Operator: InferDtype - get input dtype.";
for (auto& in_name : op_inputs) {
auto dtype = ctx->GetInputDataType(in_name);
input_dtypes.emplace_back(
CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype));
}
if (infer_dtype_func == nullptr) {
// use defalut InferDtype
info.infer_var_type_ = [op_inputs, op_outputs](InferVarTypeContext* ctx) {
PADDLE_ENFORCE_EQ(
op_inputs.size(), 1UL,
platform::errors::Unavailable(
"Your custom operator contains multiple inputs. "
"We only allow a custom operator that contains only one input "
"and "
"only one output without setting the InferDtypeFn. At this time, "
"the input dtype will be directly set to the output dtype.\n"
"Please set the InferDtypeFn of custom "
"operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))"));
PADDLE_ENFORCE_EQ(
op_outputs.size(), 1UL,
platform::errors::Unavailable(
"Your custom operator contains multiple outputs. "
"We only allow a custom operator that contains only one input "
"and "
"only one output without setting the InferDtypeFn. At this time, "
"the input dtype will be directly set to the output dtype.\n"
"Please set the InferDtypeFn of custom "
"operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))"));
VLOG(1) << "Custom Operator: InferDtype - share dtype.";
auto dtype = ctx->GetInputDataType(op_inputs[0]);
ctx->SetOutputDataType(op_outputs[0], dtype);
};
} else {
info.infer_var_type_ = [op_inputs, op_outputs,
infer_dtype_func](InferVarTypeContext* ctx) {
std::vector<DataType> input_dtypes;
VLOG(1) << "Custom Operator: InferDtype - get input dtype.";
for (auto& in_name : op_inputs) {
auto dtype = ctx->GetInputDataType(in_name);
input_dtypes.emplace_back(
CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype));
}
VLOG(1) << "Custom Operator: InferDtype - infer output dtype.";
auto output_dtypes = infer_dtype_func(input_dtypes);
VLOG(1) << "Custom Operator: InferDtype - infer output dtype.";
auto output_dtypes = infer_dtype_func(input_dtypes);
VLOG(1) << "Custom Operator: InferDtype - set output dtype.";
for (size_t i = 0; i < op_outputs.size(); ++i) {
ctx->SetOutputDataType(
op_outputs[i],
CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i]));
}
};
VLOG(1) << "Custom Operator: InferDtype - set output dtype.";
for (size_t i = 0; i < op_outputs.size(); ++i) {
ctx->SetOutputDataType(
op_outputs[i],
CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i]));
}
};
}
// Kernel func
RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs, op_attrs);
......
......@@ -3,10 +3,12 @@ if(WITH_GPU)
# 'test_custom_relu_op_setup/jit' compile .cc and .cu file
py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
# Compiling shared library will cost some time, but running process is very fast.
set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
endif()
py_test(test_sysconfig SRCS test_sysconfig.py)
......
......@@ -150,15 +150,7 @@ std::vector<paddle::Tensor> AttrTestBackward(
return {grad_x};
}
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) {
return {x_shape};
}
std::vector<paddle::DataType> InferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
PD_BUILD_OP("attr_test")
PD_BUILD_OP(attr_test)
.Inputs({"X"})
.Outputs({"Out"})
.Attrs({"bool_attr: bool",
......@@ -170,10 +162,9 @@ PD_BUILD_OP("attr_test")
"float_vec_attr: std::vector<float>",
"int64_vec_attr: std::vector<int64_t>",
"str_vec_attr: std::vector<std::string>"})
.SetKernelFn(PD_KERNEL(AttrTestForward))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType))
.SetBackwardOp("attr_test_grad")
.SetKernelFn(PD_KERNEL(AttrTestForward));
PD_BUILD_GRAD_OP(attr_test)
.Inputs({paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.Attrs({"int_attr: int",
......
......@@ -96,21 +96,12 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
}
}
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape) {
return {x_shape};
}
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
PD_BUILD_OP("custom_relu")
PD_BUILD_OP(custom_relu)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType))
.SetBackwardOp("relu2_grad")
.SetKernelFn(PD_KERNEL(ReluForward));
PD_BUILD_GRAD_OP(custom_relu)
.Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward));
......@@ -25,19 +25,14 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out);
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape);
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype);
// Reuse codes in `custom_relu_op.cc/cu` to register another custom operator
// to test jointly compile multi operators at same time.
PD_BUILD_OP("custom_relu_dup")
PD_BUILD_OP(custom_relu_dup)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType))
.SetBackwardOp("relu3_grad")
.SetKernelFn(PD_KERNEL(ReluForward));
PD_BUILD_GRAD_OP(custom_relu_dup)
.Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward));
......@@ -26,14 +26,6 @@ void assign_cpu_kernel(const data_t* x_data,
}
}
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) {
return {x_shape};
}
std::vector<paddle::DataType> InferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape());
......@@ -47,12 +39,10 @@ std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
return {out};
}
PD_BUILD_OP("dispatch_test_integer")
PD_BUILD_OP(dispatch_test_integer)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestInterger))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
.SetKernelFn(PD_KERNEL(DispatchTestInterger));
std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU);
......@@ -67,12 +57,10 @@ std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
return {out};
}
PD_BUILD_OP("dispatch_test_complex")
PD_BUILD_OP(dispatch_test_complex)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestComplex))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
.SetKernelFn(PD_KERNEL(DispatchTestComplex));
std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
const paddle::Tensor& x) {
......@@ -88,12 +76,10 @@ std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
return {out};
}
PD_BUILD_OP("dispatch_test_float_and_integer")
PD_BUILD_OP(dispatch_test_float_and_integer)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger));
std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
const paddle::Tensor& x) {
......@@ -109,12 +95,10 @@ std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
return {out};
}
PD_BUILD_OP("dispatch_test_float_and_complex")
PD_BUILD_OP(dispatch_test_float_and_complex)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex));
std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
const paddle::Tensor& x) {
......@@ -130,9 +114,7 @@ std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
return {out};
}
PD_BUILD_OP("dispatch_test_float_and_integer_and_complex")
PD_BUILD_OP(dispatch_test_float_and_integer_and_complex)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex))
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex));
......@@ -68,7 +68,7 @@ std::vector<paddle::DataType> InferDtype(paddle::DataType x_dtype) {
return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32};
}
PD_BUILD_OP("multi_out")
PD_BUILD_OP(multi_out)
.Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"})
.SetKernelFn(PD_KERNEL(MultiOutCPU))
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
import paddle
from paddle import nn
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_relu_for_model_jit\\custom_relu_for_model_jit.pyd'.format(
get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
# Compile and load custom op Just-In-Time.
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
custom_module = load(
name='custom_relu_for_model_jit',
sources=['custom_relu_op.cc', 'custom_relu_op.cu'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI
extra_cuda_cflags=extra_compile_args, # add for Coverage CI
verbose=True)
class Net(nn.Layer):
"""
A simple exmaple for Regression Model.
"""
def __init__(self, in_dim, out_dim, use_custom_op=False):
super(Net, self).__init__()
self.fc1 = nn.Linear(in_dim, in_dim)
self.fc2 = nn.Linear(in_dim, out_dim)
self.relu_act = custom_module.custom_relu if use_custom_op else nn.functional.relu
def forward(self, x):
out = self.fc1(x)
out = self.relu_act(out)
out = self.fc2(out)
out = self.relu_act(out)
out = paddle.mean(out, axis=-1)
return out
class TestDygraphModel(unittest.TestCase):
def setUp(self):
self.seed = 2021
self.in_dim = 10
self.out_dim = 64
self.batch_num = 10
self.batch_size = 4
self.datas = [
np.random.uniform(
size=[self.batch_size, self.in_dim]).astype('float32')
for i in range(self.batch_num)
]
self.labels = [
np.random.uniform(size=[self.batch_size, 1]).astype('float32')
for i in range(self.batch_num)
]
self.devices = ['cpu', 'gpu']
# for saving model
self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams"
self.model_dy2stat_path = "infer_model/custom_relu_model_dy2sta"
# for dy2stat
self.x_spec = paddle.static.InputSpec(
shape=[None, self.in_dim], dtype='float32', name='x')
def test_train_eval(self):
for device in self.devices:
# set device
paddle.set_device(device)
# for train
origin_relu_train_out = self.train_model(use_custom_op=False)
custom_relu_train_out = self.train_model(use_custom_op=True)
custom_relu_dy2stat_train_out = self.train_model(
use_custom_op=True, dy2stat=True) # for to_static
self.assertTrue(
np.array_equal(origin_relu_train_out, custom_relu_train_out))
self.assertTrue(
np.array_equal(origin_relu_train_out,
custom_relu_dy2stat_train_out))
# for eval
origin_relu_eval_out = self.eval_model(use_custom_op=False)
custom_relu_eval_out = self.eval_model(use_custom_op=True)
custom_relu_dy2stat_eval_out = self.eval_model(
use_custom_op=True, dy2stat=True) # for to_static
self.assertTrue(
np.array_equal(origin_relu_eval_out, custom_relu_eval_out))
self.assertTrue(
np.array_equal(origin_relu_eval_out,
custom_relu_dy2stat_eval_out))
def train_model(self, use_custom_op=False, dy2stat=False):
# reset random seed
paddle.seed(self.seed)
np.random.seed(self.seed)
# paddle.framework.random._manual_program_seed(SEED)
net = Net(self.in_dim, self.out_dim, use_custom_op)
if dy2stat:
net = paddle.jit.to_static(net, input_spec=[self.x_spec])
mse_loss = paddle.nn.MSELoss()
sgd = paddle.optimizer.SGD(learning_rate=0.1,
parameters=net.parameters())
for batch_id in range(self.batch_num):
x = paddle.to_tensor(self.datas[batch_id])
y = paddle.to_tensor(self.labels[batch_id])
out = net(x)
loss = mse_loss(out, y)
loss.backward()
sgd.minimize(loss)
net.clear_gradients()
# save inference model
net.eval()
if dy2stat:
paddle.jit.save(net, self.model_dy2stat_path)
else:
paddle.save(net.state_dict(),
self.model_path_template.format(use_custom_op))
return out.numpy()
def eval_model(self, use_custom_op=False, dy2stat=False):
net = Net(self.in_dim, self.out_dim, use_custom_op)
if dy2stat:
net = paddle.jit.load(self.model_dy2stat_path)
else:
state_dict = paddle.load(
self.model_path_template.format(use_custom_op))
net.set_state_dict(state_dict)
sample_x = paddle.to_tensor(self.datas[0])
net.eval()
out = net(sample_x)
return out.numpy()
class TestStaticModel(unittest.TestCase):
def setUp(self):
self.seed = 2021
self.in_dim = 10
self.out_dim = 64
self.batch_num = 10
self.batch_size = 8
self.datas = [
np.random.uniform(
size=[self.batch_size, self.in_dim]).astype('float32')
for i in range(self.batch_num)
]
self.labels = [
np.random.uniform(size=[self.batch_size, 1]).astype('float32')
for i in range(self.batch_num)
]
self.devices = ['cpu', 'gpu']
# for saving model
self.model_path_template = "infer_model/custom_relu_static_model_{}_{}"
paddle.enable_static()
def tearDown(self):
paddle.disable_static()
def test_train_eval(self):
for device in self.devices:
# for train
original_relu_train_out = self.train_model(
device, use_custom_op=False)
custom_relu_train_out = self.train_model(device, use_custom_op=True)
# using PE
original_relu_train_pe_out = self.train_model(
device, use_custom_op=False, use_pe=True)
custom_relu_train_pe_out = self.train_model(
device, use_custom_op=True, use_pe=True)
print(original_relu_train_out)
print(custom_relu_train_out)
print(original_relu_train_pe_out)
print(custom_relu_train_pe_out)
self.assertTrue(
np.array_equal(original_relu_train_out, custom_relu_train_out))
self.assertTrue(
np.array_equal(original_relu_train_pe_out,
custom_relu_train_pe_out))
# for eval
original_relu_eval_out = self.eval_model(
device, use_custom_op=False)
custom_relu_eval_out = self.eval_model(device, use_custom_op=True)
# using PE
original_relu_eval_pe_out = self.eval_model(
device, use_custom_op=False, use_pe=True)
custom_relu_eval_pe_out = self.eval_model(
device, use_custom_op=True, use_pe=True)
print(original_relu_eval_out)
print(custom_relu_eval_out)
print(original_relu_eval_pe_out)
print(custom_relu_eval_pe_out)
self.assertTrue(
np.array_equal(original_relu_eval_out, custom_relu_eval_out))
self.assertTrue(
np.array_equal(original_relu_eval_pe_out,
custom_relu_eval_pe_out))
def train_model(self, device, use_custom_op=False, use_pe=False):
# reset random seed
paddle.seed(self.seed)
np.random.seed(self.seed)
# set device
paddle.set_device(device)
with paddle.static.scope_guard(paddle.static.Scope()):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data(
shape=[None, self.in_dim], name='x', dtype='float32')
y = paddle.static.data(
shape=[None, 1], name='y', dtype='float32')
net = Net(self.in_dim, self.out_dim, use_custom_op)
out = net(x)
loss = nn.functional.mse_loss(out, y)
sgd = paddle.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
exe = exe = paddle.static.Executor()
exe.run(paddle.static.default_startup_program())
# For PE
if use_pe:
places = paddle.static.cpu_places(
) if device is 'cpu' else paddle.static.cuda_places()
main_program = paddle.static.CompiledProgram(
paddle.static.default_main_program(
)).with_data_parallel(
loss_name=loss.name, places=places)
else:
main_program = paddle.static.default_main_program()
for batch_id in range(self.batch_num):
x_data = self.datas[batch_id]
y_data = self.labels[batch_id]
res = exe.run(main_program,
feed={'x': x_data,
'y': y_data},
fetch_list=[out])
# save model
paddle.static.save_inference_model(
self.model_path_template.format(use_custom_op, use_pe),
[x], [out], exe)
return res[0]
def eval_model(self, device, use_custom_op=False, use_pe=False):
paddle.set_device(device)
with paddle.static.scope_guard(paddle.static.Scope()):
with paddle.static.program_guard(paddle.static.Program()):
exe = paddle.static.Executor()
[inference_program, feed_target_names,
fetch_targets] = paddle.static.load_inference_model(
self.model_path_template.format(use_custom_op, use_pe),
exe)
x_data = self.datas[0]
results = exe.run(inference_program,
feed={feed_target_names[0]: x_data},
fetch_list=fetch_targets)
return results[0]
if __name__ == '__main__':
unittest.main()
......@@ -22,11 +22,14 @@ from setuptools.command.easy_install import easy_install
from setuptools.command.build_ext import build_ext
from distutils.command.build import build
from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags, add_std_without_repeat, get_build_directory
from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from
from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS, OS_NAME
from .extension_utils import use_new_custom_op_load_method, MSVC_COMPILE_FLAGS
from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags
from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile
from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from
from .extension_utils import use_new_custom_op_load_method, clean_object_if_change_cflags
from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat
from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS
# Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default,
# The solution is: 1.User add function PyInit_[name] 2. set not to export
......@@ -357,6 +360,13 @@ class BuildExtension(build_ext, object):
def build_extensions(self):
self._check_abi()
# Note(Aurelius84): If already compiling source before, we should check whether
# cflags have changed and delete the built shared library to re-compile the source
# even though source file content keep unchanaged.
so_name = self.get_ext_fullpath(self.extensions[0].name)
clean_object_if_change_cflags(
os.path.abspath(so_name), self.extensions[0])
# Consider .cu, .cu.cc as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cu.cc']
# Save the original _compile method for later.
......
......@@ -16,7 +16,9 @@ import os
import re
import six
import sys
import json
import glob
import hashlib
import logging
import collections
import textwrap
......@@ -219,6 +221,106 @@ class CustomOpInfo:
return next(reversed(self.op_info_map.items()))
VersionFields = collections.namedtuple('VersionFields', [
'sources',
'extra_compile_args',
'extra_link_args',
'library_dirs',
'runtime_library_dirs',
'include_dirs',
'define_macros',
'undef_macros',
])
class VersionManager:
def __init__(self, version_field):
self.version_field = version_field
self.version = self.hasher(version_field)
def hasher(self, version_field):
from paddle.fluid.layers.utils import flatten
md5 = hashlib.md5()
for field in version_field._fields:
elem = getattr(version_field, field)
if not elem: continue
if isinstance(elem, (list, tuple, dict)):
flat_elem = flatten(elem)
md5 = combine_hash(md5, tuple(flat_elem))
else:
raise RuntimeError(
"Support types with list, tuple and dict, but received {} with {}.".
format(type(elem), elem))
return md5.hexdigest()
@property
def details(self):
return self.version_field._asdict()
def combine_hash(md5, value):
"""
Return new hash value.
DO NOT use `hash()` beacuse it doesn't generate stable value between different process.
See https://stackoverflow.com/questions/27522626/hash-function-in-python-3-3-returns-different-results-between-sessions
"""
md5.update(repr(value).encode())
return md5
def clean_object_if_change_cflags(so_path, extension):
"""
If already compiling source before, we should check whether cflags
have changed and delete the built object to re-compile the source
even though source file content keeps unchanaged.
"""
def serialize(path, version_info):
assert isinstance(version_info, dict)
with open(path, 'w') as f:
f.write(json.dumps(version_info, indent=4, sort_keys=True))
def deserialize(path):
assert os.path.exists(path)
with open(path, 'r') as f:
content = f.read()
return json.loads(content)
# version file
VERSION_FILE = "version.txt"
base_dir = os.path.dirname(so_path)
so_name = os.path.basename(so_path)
version_file = os.path.join(base_dir, VERSION_FILE)
# version info
args = [getattr(extension, field, None) for field in VersionFields._fields]
version_field = VersionFields._make(args)
versioner = VersionManager(version_field)
if os.path.exists(so_path) and os.path.exists(version_file):
old_version_info = deserialize(version_file)
so_version = old_version_info.get(so_name, None)
# delete shared library file if versison is changed to re-compile it.
if so_version is not None and so_version != versioner.version:
log_v(
"Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.".
format(so_name, versioner.version, version_file))
os.remove(so_path)
# upate new version information
new_version_info = versioner.details
new_version_info[so_name] = versioner.version
serialize(version_file, new_version_info)
else:
# If compile at first time, save compiling detail information for debug.
if not os.path.exists(base_dir):
os.makedirs(base_dir)
details = versioner.details
details[so_name] = versioner.version
serialize(version_file, details)
def prepare_unix_cudaflags(cflags):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册