未验证 提交 40cd5271 编写于 作者: Z zyfncg 提交者: GitHub

Generate static graph code for some activation ops by Yaml (part3) (#47640)

* generate static graph code for some activation op

* fix bug

* fix infermeta of selected_rows
上级 2cff0e8a
...@@ -407,6 +407,8 @@ void CompatMetaTensor::share_dims(const MetaTensor& meta_tensor) { ...@@ -407,6 +407,8 @@ void CompatMetaTensor::share_dims(const MetaTensor& meta_tensor) {
static_cast<const CompatMetaTensor&>(meta_tensor).GetSelectedRows(); static_cast<const CompatMetaTensor&>(meta_tensor).GetSelectedRows();
selected_rows->set_rows(input_selected_rows.rows()); selected_rows->set_rows(input_selected_rows.rows());
selected_rows->set_height(input_selected_rows.height()); selected_rows->set_height(input_selected_rows.height());
phi::DenseTensorUtils::GetMutableMeta(selected_rows->mutable_value())
->dims = input_selected_rows.value().dims();
} }
} }
} }
......
...@@ -266,57 +266,19 @@ using BReluFunctor = phi::funcs::HardTanhFunctor<T>; ...@@ -266,57 +266,19 @@ using BReluFunctor = phi::funcs::HardTanhFunctor<T>;
template <typename T> template <typename T>
using BReluGradFunctor = phi::funcs::HardTanhGradFunctor<T>; using BReluGradFunctor = phi::funcs::HardTanhGradFunctor<T>;
USE_PHI_FUNCTOR(Cos)
USE_PHI_FUNCTOR(Tan)
USE_PHI_FUNCTOR(Acos)
USE_PHI_FUNCTOR(Sin)
USE_PHI_FUNCTOR(Asin)
USE_PHI_FUNCTOR(Atan)
USE_PHI_FUNCTOR(Sinh)
USE_PHI_FUNCTOR(Cosh)
USE_PHI_FUNCTOR(Asinh)
USE_PHI_FUNCTOR(Acosh)
USE_PHI_FUNCTOR(Atanh)
USE_PHI_FUNCTOR(Tanh) USE_PHI_FUNCTOR(Tanh)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh)
USE_PHI_FUNCTOR(ThresholdedRelu)
USE_PHI_FUNCTOR(Relu6) USE_PHI_FUNCTOR(Relu6)
USE_PHI_FUNCTOR(LeakyRelu) USE_PHI_FUNCTOR(LeakyRelu)
USE_PHI_DOUBLE_GRAD_FUNCTOR(LeakyRelu) USE_PHI_DOUBLE_GRAD_FUNCTOR(LeakyRelu)
USE_PHI_FUNCTOR(HardShrink) USE_PHI_FUNCTOR(HardShrink)
USE_PHI_FUNCTOR(SoftShrink)
USE_PHI_FUNCTOR(TanhShrink)
USE_PHI_FUNCTOR(Silu)
USE_PHI_FUNCTOR(ELU) USE_PHI_FUNCTOR(ELU)
USE_PHI_DOUBLE_GRAD_FUNCTOR(ELU)
USE_PHI_FUNCTOR(Softsign)
USE_PHI_FUNCTOR(Sigmoid) USE_PHI_FUNCTOR(Sigmoid)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Sigmoid)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Sigmoid)
USE_PHI_FUNCTOR(LogSigmoid)
USE_PHI_FUNCTOR(HardSigmoid) USE_PHI_FUNCTOR(HardSigmoid)
USE_PHI_FUNCTOR(Log)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Log)
USE_PHI_FUNCTOR(Log2)
USE_PHI_FUNCTOR(Log10)
USE_PHI_FUNCTOR(Log1p)
USE_PHI_FUNCTOR(Swish) USE_PHI_FUNCTOR(Swish)
USE_PHI_FUNCTOR(HardSwish) USE_PHI_FUNCTOR(HardSwish)
USE_PHI_FUNCTOR(Pow) USE_PHI_FUNCTOR(Pow)
USE_PHI_FUNCTOR(Expm1)
USE_PHI_FUNCTOR(Mish) USE_PHI_FUNCTOR(Mish)
USE_PHI_FUNCTOR(STanh) USE_PHI_FUNCTOR(STanh)
USE_PHI_FUNCTOR(Reciprocal)
USE_PHI_FUNCTOR(Square)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Square)
USE_PHI_FUNCTOR(Sqrt)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Sqrt)
USE_PHI_FUNCTOR(Rsqrt)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Rsqrt)
USE_PHI_FUNCTOR(Softplus)
USE_PHI_FUNCTOR(CELU)
USE_PHI_DOUBLE_GRAD_FUNCTOR(CELU)
template <typename T> template <typename T>
using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor<T>; using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor<T>;
...@@ -386,40 +348,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -386,40 +348,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
template <typename DeviceContext, typename T>
class ELUGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<phi::DenseTensor>("X");
auto* Out = context.Input<phi::DenseTensor>("Out");
auto* dOut = context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* dX = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
const float alpha = context.Attr<float>("alpha");
dX->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "elu_grad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Input", "Out", "elu_grad"));
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Input", "dOut", "elu_grad"));
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "dX", "elu_grad"));
auto* place =
context.template device_context<DeviceContext>().eigen_device();
if (alpha > 0) {
ELUGradFunctor<T> functor;
functor.alpha = alpha;
functor(*place, x, out, dout, dx);
} else {
ELUGradNegativeAlphaFunctor<T> functor;
functor.alpha = alpha;
functor(*place, x, out, dout, dx);
}
}
};
template <typename T> template <typename T>
struct AbsGradGradFunctor : public BaseActivationFunctor<T> { struct AbsGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device> template <typename Device>
......
...@@ -8,6 +8,7 @@ cc_test_old( ...@@ -8,6 +8,7 @@ cc_test_old(
shape_op shape_op
crop_op crop_op
activation_op activation_op
generated_op
pooling pooling
transpose_op transpose_op
scope scope
......
...@@ -117,6 +117,29 @@ ...@@ -117,6 +117,29 @@
func : ceil_grad func : ceil_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : celu_double_grad
forward : celu_grad(Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : celu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : celu_grad
forward : celu(Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu_grad
backward : celu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : cholesky_grad - backward_op : cholesky_grad
forward : cholesky (Tensor x, bool upper) -> Tensor(out) forward : cholesky (Tensor x, bool upper) -> Tensor(out)
args : (Tensor out, Tensor out_grad, bool upper) args : (Tensor out, Tensor out_grad, bool upper)
...@@ -236,6 +259,29 @@ ...@@ -236,6 +259,29 @@
func : dot_grad func : dot_grad
data_type : out_grad data_type : out_grad
- backward_op : elu_double_grad
forward : elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : elu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : elu_grad
forward : elu (Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu_grad
backward : elu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : erf_grad - backward_op : erf_grad
forward : erf (Tensor x) -> Tensor(out) forward : erf (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
...@@ -350,6 +396,29 @@ ...@@ -350,6 +396,29 @@
func : hard_sigmoid_grad func : hard_sigmoid_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : leaky_relu_double_grad
forward : leaky_relu_grad (Tensor x, Tensor grad_out, float negative_slope) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, float negative_slope)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_x_grad]
kernel :
func : leaky_relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : leaky_relu_grad
forward : leaky_relu (Tensor x, float negative_slope) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float negative_slope)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu_grad
backward : leaky_relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : lgamma_grad - backward_op : lgamma_grad
forward : lgamma(Tensor x) -> Tensor(out) forward : lgamma(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
...@@ -393,6 +462,29 @@ ...@@ -393,6 +462,29 @@
func : log2_grad func : log2_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : log_double_grad
forward : log_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : log_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : log_grad
forward : log (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : log_grad
backward : log_double_grad
inplace : (out_grad -> x_grad)
- backward_op : logit_grad - backward_op : logit_grad
forward : logit (Tensor x, float eps = 1e-6f) -> Tensor(out) forward : logit (Tensor x, float eps = 1e-6f) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float eps) args : (Tensor x, Tensor out_grad, float eps)
...@@ -445,6 +537,29 @@ ...@@ -445,6 +537,29 @@
func : reciprocal_grad func : reciprocal_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : relu_grad
forward : relu (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_grad
backward: relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : round_grad - backward_op : round_grad
forward : round(Tensor x) -> Tensor(out) forward : round(Tensor x) -> Tensor(out)
args : (Tensor out_grad) args : (Tensor out_grad)
...@@ -456,6 +571,29 @@ ...@@ -456,6 +571,29 @@
func : round_grad func : round_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : rsqrt_double_grad
forward : rsqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : rsqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : rsqrt_grad
forward : rsqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : rsqrt_grad
backward : rsqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : send_uv_grad - backward_op : send_uv_grad
forward : send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") -> Tensor(out) forward : send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") -> Tensor(out)
args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str message_op = "ADD") args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str message_op = "ADD")
...@@ -467,6 +605,42 @@ ...@@ -467,6 +605,42 @@
func : send_uv_grad func : send_uv_grad
data_type : x data_type : x
- backward_op : sigmoid_double_grad
forward : sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, fwd_grad_out]
kernel :
func : sigmoid_double_grad
backward : sigmoid_triple_grad
inplace : (grad_x_grad -> fwd_grad_out_grad)
- backward_op : sigmoid_grad
forward : sigmoid (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sigmoid_grad
backward : sigmoid_double_grad
inplace : (out_grad -> x_grad)
- backward_op : sigmoid_triple_grad
forward : sigmoid_double_grad (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x) -> Tensor(grad_out), Tensor(grad_grad_out)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x, Tensor grad_out_grad, Tensor grad_grad_out_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad), Tensor(grad_grad_x_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [out, fwd_grad_out, grad_grad_x]
kernel :
func : sigmoid_triple_grad
optional : grad_grad_out_grad
inplace : (grad_grad_x -> fwd_grad_out_grad)
- backward_op : silu_grad - backward_op : silu_grad
forward : silu (Tensor x) -> Tensor(out) forward : silu (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
...@@ -512,6 +686,39 @@ ...@@ -512,6 +686,39 @@
func : sinh_grad func : sinh_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : softplus_grad
forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float beta, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus_grad
inplace : (out_grad -> x_grad)
- backward_op : softshrink_grad
forward : softshrink (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softshrink_grad
inplace : (out_grad -> x_grad)
- backward_op : softsign_grad
forward : softsign (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign_grad
inplace : (out_grad -> x_grad)
- backward_op : solve_grad - backward_op : solve_grad
forward : solve (Tensor x, Tensor y) -> Tensor(out) forward : solve (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
...@@ -522,6 +729,52 @@ ...@@ -522,6 +729,52 @@
kernel : kernel :
func : solve_grad func : solve_grad
- backward_op : sqrt_double_grad
forward : sqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : sqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : sqrt_grad
forward : sqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sqrt_grad
backward : sqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : square_double_grad
forward : square_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : square_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : square_grad
forward : square (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : square_grad
backward : square_double_grad
inplace : (out_grad -> x_grad)
- backward_op : tan_grad - backward_op : tan_grad
forward : tan (Tensor x) -> Tensor(out) forward : tan (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
...@@ -557,6 +810,17 @@ ...@@ -557,6 +810,17 @@
backward : tanh_double_grad backward : tanh_double_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : tanh_shrink_grad
forward : tanh_shrink (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : tanh_shrink_grad
inplace : (out_grad -> x_grad)
- backward_op : tanh_triple_grad - backward_op : tanh_triple_grad
forward : tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad) forward : tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad)
args : (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad) args : (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad)
...@@ -568,6 +832,17 @@ ...@@ -568,6 +832,17 @@
func : tanh_triple_grad func : tanh_triple_grad
inplace : (grad_x_grad_forward -> grad_out_forward_grad) inplace : (grad_x_grad_forward -> grad_out_forward_grad)
- backward_op : thresholded_relu_grad
forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu_grad
inplace : (out_grad -> x_grad)
- backward_op : trace_grad - backward_op : trace_grad
forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out) forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2) args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2)
......
...@@ -75,6 +75,11 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict): ...@@ -75,6 +75,11 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict):
else: else:
return names[0].strip(), names[1].split(')')[0].strip() return names[0].strip(), names[1].split(')')[0].strip()
def update_api_attr_name(attrs, attrs_alias_map):
for attr_item in attrs:
if attr_item['name'] in attrs_alias_map:
attr_item['name'] = attrs_alias_map[attr_item['name']]
for api_args in api_op_map: for api_args in api_op_map:
api_name, op_name = get_api_and_op_name(api_args['op']) api_name, op_name = get_api_and_op_name(api_args['op'])
if api_name not in forward_api_dict: if api_name not in forward_api_dict:
...@@ -99,6 +104,13 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict): ...@@ -99,6 +104,13 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict):
double_grad_item = backward_api_dict[double_grad_api_name] double_grad_item = backward_api_dict[double_grad_api_name]
backward_api_item['backward'] = double_grad_op_name backward_api_item['backward'] = double_grad_op_name
double_grad_item['op_name'] = double_grad_op_name double_grad_item['op_name'] = double_grad_op_name
if 'attrs' in api_args:
update_api_attr_name(
double_grad_item['attrs'], api_args['attrs']
)
update_api_attr_name(
double_grad_item['forward']['attrs'], api_args['attrs']
)
# for triple grad # for triple grad
if len(backward_op_list) > 2: if len(backward_op_list) > 2:
...@@ -109,6 +121,14 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict): ...@@ -109,6 +121,14 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict):
triple_grad_item = backward_api_dict[triple_grad_api_name] triple_grad_item = backward_api_dict[triple_grad_api_name]
double_grad_item['backward'] = triple_grad_op_name double_grad_item['backward'] = triple_grad_op_name
triple_grad_item['op_name'] = triple_grad_op_name triple_grad_item['op_name'] = triple_grad_op_name
if 'attrs' in api_args:
update_api_attr_name(
triple_grad_item['attrs'], api_args['attrs']
)
update_api_attr_name(
triple_grad_item['forward']['attrs'],
api_args['attrs'],
)
key_set = ['inputs', 'attrs', 'outputs'] key_set = ['inputs', 'attrs', 'outputs']
args_map = {} args_map = {}
......
...@@ -459,7 +459,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker<T> ...@@ -459,7 +459,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker<T>
Input({{name_in_forward_orig | to_opmaker_name}}) Input({{name_in_forward_orig | to_opmaker_name}})
{%- elif name in output_names %} {%- elif name in output_names %}
{% set name_in_forward_orig = output_orig_names[output_names.index(name)]%} {% set name_in_forward_orig = output_orig_names[output_names.index(name)]%}
Output({{name | to_opmaker_name}}) Output({{name_in_forward_orig | to_opmaker_name}})
{%- elif name.endswith("_grad") %}{# output grad#} {%- elif name.endswith("_grad") %}{# output grad#}
{% set name_in_forward = name[:-5] %} {% set name_in_forward = name[:-5] %}
{% if name_in_forward in output_names %} {% if name_in_forward in output_names %}
......
...@@ -217,29 +217,6 @@ ...@@ -217,29 +217,6 @@
invoke : cast (out_grad, x.dtype()) invoke : cast (out_grad, x.dtype())
no_need_buffer : x no_need_buffer : x
- backward_op : celu_double_grad
forward : celu_grad(Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : celu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : celu_grad
forward : celu(Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu_grad
backward : celu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : clip_double_grad - backward_op : clip_double_grad
forward : clip_grad (Tensor x, Tensor grad_out, Scalar min = 0., Scalar max = 0.) -> Tensor(grad_x) forward : clip_grad (Tensor x, Tensor grad_out, Scalar min = 0., Scalar max = 0.) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, Scalar min = 0., Scalar max = 0.) args : (Tensor x, Tensor grad_x_grad, Scalar min = 0., Scalar max = 0.)
...@@ -552,29 +529,6 @@ ...@@ -552,29 +529,6 @@
kernel : kernel :
func : elementwise_pow_grad func : elementwise_pow_grad
- backward_op : elu_double_grad
forward : elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : elu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : elu_grad
forward : elu (Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu_grad
backward : elu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : embedding_grad - backward_op : embedding_grad
forward : embedding (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false) -> Tensor(out) forward : embedding (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false) -> Tensor(out)
args : (Tensor x, Tensor weight, Tensor out_grad, int64_t padding_idx=-1, bool sparse=false) args : (Tensor x, Tensor weight, Tensor out_grad, int64_t padding_idx=-1, bool sparse=false)
...@@ -940,29 +894,6 @@ ...@@ -940,29 +894,6 @@
no_need_buffer : bias no_need_buffer : bias
optional : scale, bias optional : scale, bias
- backward_op : leaky_relu_double_grad
forward : leaky_relu_grad (Tensor x, Tensor grad_out, float negative_slope) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, float negative_slope)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_x_grad]
kernel :
func : leaky_relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : leaky_relu_grad
forward : leaky_relu (Tensor x, float negative_slope) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float negative_slope)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu_grad
backward : leaky_relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : lerp_grad - backward_op : lerp_grad
forward : lerp (Tensor x, Tensor y, Tensor weight) -> Tensor(out) forward : lerp (Tensor x, Tensor y, Tensor weight) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor weight, Tensor out, Tensor out_grad) args : (Tensor x, Tensor y, Tensor weight, Tensor out, Tensor out_grad)
...@@ -985,29 +916,6 @@ ...@@ -985,29 +916,6 @@
func : linear_interp_grad func : linear_interp_grad
data_type : output_grad data_type : output_grad
- backward_op : log_double_grad
forward : log_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : log_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : log_grad
forward : log (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : log_grad
backward : log_double_grad
inplace : (out_grad -> x_grad)
- backward_op : log_loss_grad - backward_op : log_loss_grad
forward : log_loss (Tensor input, Tensor label, float epsilon) -> Tensor(out) forward : log_loss (Tensor input, Tensor label, float epsilon) -> Tensor(out)
args : (Tensor input, Tensor label, Tensor out_grad, float epsilon) args : (Tensor input, Tensor label, Tensor out_grad, float epsilon)
...@@ -1537,29 +1445,6 @@ ...@@ -1537,29 +1445,6 @@
func : relu6_grad func : relu6_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : relu_grad
forward : relu (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_grad
backward: relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : renorm_grad - backward_op : renorm_grad
forward : renorm (Tensor x, float p, int axis, float max_norm) -> Tensor(out) forward : renorm (Tensor x, float p, int axis, float max_norm) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float p, int axis, float max_norm) args : (Tensor x, Tensor out_grad, float p, int axis, float max_norm)
...@@ -1683,29 +1568,6 @@ ...@@ -1683,29 +1568,6 @@
data_type : x data_type : x
no_need_buffer : x no_need_buffer : x
- backward_op : rsqrt_double_grad
forward : rsqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : rsqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : rsqrt_grad
forward : rsqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : rsqrt_grad
backward : rsqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : scale_grad - backward_op : scale_grad
forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out) forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out)
args : (Tensor out_grad, Scalar scale=1.0, bool bias_after_scale=true) args : (Tensor out_grad, Scalar scale=1.0, bool bias_after_scale=true)
...@@ -1791,42 +1653,6 @@ ...@@ -1791,42 +1653,6 @@
func : sigmoid_cross_entropy_with_logits_grad func : sigmoid_cross_entropy_with_logits_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : sigmoid_double_grad
forward : sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, fwd_grad_out]
kernel :
func : sigmoid_double_grad
backward : sigmoid_triple_grad
inplace : (grad_x_grad -> fwd_grad_out_grad)
- backward_op : sigmoid_grad
forward : sigmoid (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sigmoid_grad
backward : sigmoid_double_grad
inplace : (out_grad -> x_grad)
- backward_op : sigmoid_triple_grad
forward : sigmoid_double_grad (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x) -> Tensor(grad_out), Tensor(grad_grad_out)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x, Tensor grad_out_grad, Tensor grad_grad_out_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad), Tensor(grad_grad_x_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [out, fwd_grad_out, grad_grad_x]
kernel :
func : sigmoid_triple_grad
optional : grad_grad_out_grad
inplace : (grad_grad_x -> fwd_grad_out_grad)
- backward_op : sign_grad - backward_op : sign_grad
forward : sign (Tensor x) -> Tensor(out) forward : sign (Tensor x) -> Tensor(out)
args : (Tensor out_grad) args : (Tensor out_grad)
...@@ -1872,39 +1698,6 @@ ...@@ -1872,39 +1698,6 @@
func : softmax_grad func : softmax_grad
use_gpudnn : true use_gpudnn : true
- backward_op : softplus_grad
forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float beta, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus_grad
inplace : (out_grad -> x_grad)
- backward_op : softshrink_grad
forward : softshrink (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : soft_shrink_grad
inplace : (out_grad -> x_grad)
- backward_op : softsign_grad
forward : softsign (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign_grad
inplace : (out_grad -> x_grad)
- backward_op : spectral_norm_grad - backward_op : spectral_norm_grad
forward : spectral_norm (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps) -> Tensor(out) forward : spectral_norm (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps) -> Tensor(out)
args : (Tensor weight, Tensor u, Tensor v, Tensor out_grad, int dim, int power_iters, float eps) args : (Tensor weight, Tensor u, Tensor v, Tensor out_grad, int dim, int power_iters, float eps)
...@@ -1927,52 +1720,6 @@ ...@@ -1927,52 +1720,6 @@
output : Tensor(x_grad) output : Tensor(x_grad)
invoke : concat( out_grad, axis) invoke : concat( out_grad, axis)
- backward_op : sqrt_double_grad
forward : sqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : sqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : sqrt_grad
forward : sqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sqrt_grad
backward : sqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : square_double_grad
forward : square_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : square_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : square_grad
forward : square (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : square_grad
backward : square_double_grad
inplace : (out_grad -> x_grad)
- backward_op : squared_l2_norm_grad - backward_op : squared_l2_norm_grad
forward : squared_l2_norm(Tensor x) -> Tensor(out) forward : squared_l2_norm(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
...@@ -2112,17 +1859,6 @@ ...@@ -2112,17 +1859,6 @@
kernel : kernel :
func : take_along_axis_grad func : take_along_axis_grad
- backward_op : tanh_shrink_grad
forward : tanh_shrink (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : tanh_shrink_grad
inplace : (out_grad -> x_grad)
- backward_op : temporal_shift_grad - backward_op : temporal_shift_grad
forward : temporal_shift(Tensor x, int seg_num, float shift_ratio, str data_format_str) -> Tensor(out) forward : temporal_shift(Tensor x, int seg_num, float shift_ratio, str data_format_str) -> Tensor(out)
args : (Tensor out_grad, int seg_num, float shift_ratio, str data_format_str) args : (Tensor out_grad, int seg_num, float shift_ratio, str data_format_str)
...@@ -2133,17 +1869,6 @@ ...@@ -2133,17 +1869,6 @@
kernel : kernel :
func : temporal_shift_grad func : temporal_shift_grad
- backward_op : thresholded_relu_grad
forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu_grad
inplace : (out_grad -> x_grad)
- backward_op : tile_double_grad - backward_op : tile_double_grad
forward : tile_grad (Tensor x, Tensor grad_out, IntArray repeat_times) -> Tensor(grad_x) forward : tile_grad (Tensor x, Tensor grad_out, IntArray repeat_times) -> Tensor(grad_x)
args : (Tensor grad_x_grad, IntArray repeat_times) args : (Tensor grad_x_grad, IntArray repeat_times)
......
...@@ -357,16 +357,6 @@ ...@@ -357,16 +357,6 @@
data_type : x data_type : x
backward : cast_grad backward : cast_grad
- op : celu
args : (Tensor x, float alpha)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu
backward : celu_grad
- op : check_finite_and_unscale_ - op : check_finite_and_unscale_
args : (Tensor[] x, Tensor scale, Tensor input_found_infinite) args : (Tensor[] x, Tensor scale, Tensor input_found_infinite)
output : Tensor[](out){x.size()}, Tensor(output_found_infinite) output : Tensor[](out){x.size()}, Tensor(output_found_infinite)
...@@ -664,17 +654,6 @@ ...@@ -664,17 +654,6 @@
func : elementwise_pow func : elementwise_pow
backward : elementwise_pow_grad backward : elementwise_pow_grad
- op : elu
args : (Tensor x, float alpha)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu
inplace : (x -> out)
backward : elu_grad
- op : embedding - op : embedding
args : (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false) args : (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false)
output : Tensor output : Tensor
...@@ -1241,16 +1220,6 @@ ...@@ -1241,16 +1220,6 @@
backward : layer_norm_grad backward : layer_norm_grad
optional : scale, bias optional : scale, bias
- op : leaky_relu
args : (Tensor x, float negative_slope)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu
backward : leaky_relu_grad
- op : lerp - op : lerp
args : (Tensor x, Tensor y, Tensor weight) args : (Tensor x, Tensor y, Tensor weight)
output : Tensor(out) output : Tensor(out)
...@@ -1300,15 +1269,6 @@ ...@@ -1300,15 +1269,6 @@
data_type : dtype data_type : dtype
backend : place backend : place
- op : log
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : log
backward: log_grad
- op : log_loss - op : log_loss
args : (Tensor input, Tensor label, float epsilon) args : (Tensor input, Tensor label, float epsilon)
output : Tensor output : Tensor
...@@ -1910,16 +1870,6 @@ ...@@ -1910,16 +1870,6 @@
func : real func : real
backward : real_grad backward : real_grad
- op : relu
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : relu
inplace : (x -> out)
backward : relu_grad
- op : relu6 - op : relu6
args : (Tensor x, float threshold) args : (Tensor x, float threshold)
output : Tensor output : Tensor
...@@ -2032,16 +1982,6 @@ ...@@ -2032,16 +1982,6 @@
func : roll func : roll
backward : roll_grad backward : roll_grad
- op : rsqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : rsqrt
inplace : (x -> out)
backward : rsqrt_grad
- op : scale - op : scale
args : (Tensor x, Scalar scale, float bias, bool bias_after_scale) args : (Tensor x, Scalar scale, float bias, bool bias_after_scale)
output : Tensor(out) output : Tensor(out)
...@@ -2160,15 +2100,6 @@ ...@@ -2160,15 +2100,6 @@
kernel : kernel :
func : shard_index func : shard_index
- op : sigmoid
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : sigmoid
backward : sigmoid_grad
- op : sigmoid_cross_entropy_with_logits - op : sigmoid_cross_entropy_with_logits
args : (Tensor x, Tensor label, bool normalize, int ignore_index) args : (Tensor x, Tensor label, bool normalize, int ignore_index)
output : Tensor output : Tensor
...@@ -2216,36 +2147,6 @@ ...@@ -2216,36 +2147,6 @@
inplace : (x -> out) inplace : (x -> out)
backward : softmax_grad backward : softmax_grad
- op : softplus
args : (Tensor x, float beta, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus
backward : softplus_grad
- op : softshrink
args : (Tensor x, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : soft_shrink
backward : softshrink_grad
- op : softsign
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign
backward : softsign_grad
- op : spectral_norm - op : spectral_norm
args : (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps) args : (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps)
output : Tensor output : Tensor
...@@ -2274,25 +2175,6 @@ ...@@ -2274,25 +2175,6 @@
func : split_with_num func : split_with_num
backward : split_with_num_grad backward : split_with_num_grad
- op : sqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : sqrt
inplace : (x -> out)
backward : sqrt_grad
- op : square
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : square
backward : square_grad
- op : squared_l2_norm - op : squared_l2_norm
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
...@@ -2394,15 +2276,6 @@ ...@@ -2394,15 +2276,6 @@
data_type : arr data_type : arr
backward : take_along_axis_grad backward : take_along_axis_grad
- op : tanh_shrink
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : tanh_shrink
backward : tanh_shrink_grad
- op : temporal_shift - op : temporal_shift
args : (Tensor x, int seg_num, float shift_ratio, str data_format_str) args : (Tensor x, int seg_num, float shift_ratio, str data_format_str)
output : Tensor output : Tensor
...@@ -2412,16 +2285,6 @@ ...@@ -2412,16 +2285,6 @@
func : temporal_shift func : temporal_shift
backward : temporal_shift_grad backward : temporal_shift_grad
- op : thresholded_relu
args : (Tensor x, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu
backward : thresholded_relu_grad
- op : tile - op : tile
args : (Tensor x, IntArray repeat_times) args : (Tensor x, IntArray repeat_times)
output : Tensor output : Tensor
......
...@@ -131,6 +131,13 @@ ...@@ -131,6 +131,13 @@
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : celu
backward : celu_grad, celu_double_grad(celu_grad_grad)
inputs :
x : X
outputs :
out : Out
- op : cholesky - op : cholesky
inputs : inputs :
x : X x : X
...@@ -316,7 +323,11 @@ ...@@ -316,7 +323,11 @@
bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f]
- op : elu - op : elu
backward : elu_grad backward : elu_grad, elu_double_grad (elu_grad_grad)
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false] attrs : [bool use_mkldnn = false]
...@@ -504,7 +515,13 @@ ...@@ -504,7 +515,13 @@
attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", bool is_test = false] attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", bool is_test = false]
- op : leaky_relu - op : leaky_relu
backward : leaky_relu_grad backward : leaky_relu_grad, leaky_relu_double_grad (leaky_relu_grad_grad)
inputs :
x : X
outputs :
out : Out
attrs:
negative_slope : alpha
extra : extra :
attrs : [bool use_mkldnn = false] attrs : [bool use_mkldnn = false]
...@@ -520,7 +537,11 @@ ...@@ -520,7 +537,11 @@
attrs : [bool use_mkldnn = false] attrs : [bool use_mkldnn = false]
- op : log - op : log
backward : log_grad backward : log_grad, log_double_grad (log_grad_grad)
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
...@@ -725,7 +746,11 @@ ...@@ -725,7 +746,11 @@
attrs : [bool use_mkldnn = false] attrs : [bool use_mkldnn = false]
- op : relu - op : relu
backward : relu_grad backward : relu_grad, relu_double_grad (relu_grad_grad)
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
...@@ -754,7 +779,11 @@ ...@@ -754,7 +779,11 @@
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : rsqrt - op : rsqrt
backward : rsqrt_grad backward : rsqrt_grad, rsqrt_double_grad (rsqrt_grad_grad)
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
...@@ -784,7 +813,11 @@ ...@@ -784,7 +813,11 @@
attrs : [bool use_mkldnn = false] attrs : [bool use_mkldnn = false]
- op : sigmoid - op : sigmoid
backward : sigmoid_grad backward : sigmoid_grad, sigmoid_double_grad (sigmoid_grad_grad), sigmoid_triple_grad
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
...@@ -827,12 +860,29 @@ ...@@ -827,12 +860,29 @@
- op : softplus - op : softplus
backward : softplus_grad backward : softplus_grad
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false, str fuse_activation_type = "", float fuse_activation_alpha = 0.0f, attrs : [bool use_mkldnn = false, bool use_cudnn = false, str fuse_activation_type = "", float fuse_activation_alpha = 0.0f,
float fuse_activation_beta = 0.0f, float fuse_activation_scale = 1.0f] float fuse_activation_beta = 0.0f, float fuse_activation_scale = 1.0f]
- op : softshrink
backward : softshrink_grad
inputs :
x : X
outputs :
out : Out
attrs :
threshold : lambda
- op : softsign - op : softsign
backward : softsign_grad backward : softsign_grad
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
...@@ -843,12 +893,20 @@ ...@@ -843,12 +893,20 @@
out : Out out : Out
- op : sqrt - op : sqrt
backward : sqrt_grad backward : sqrt_grad, sqrt_double_grad (sqrt_grad_grad)
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : square - op : square
backward : square_grad backward : square_grad, square_double_grad (square_grad_grad)
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
...@@ -903,9 +961,19 @@ ...@@ -903,9 +961,19 @@
- op : tanh_shrink - op : tanh_shrink
backward : tanh_shrink_grad backward : tanh_shrink_grad
inputs :
x : X
outputs :
out : Out
extra : extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false] attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : thresholded_relu
inputs :
x : X
outputs :
out : Out
- op : trace - op : trace
inputs : inputs :
x : Input x : Input
......
...@@ -106,6 +106,16 @@ ...@@ -106,6 +106,16 @@
inplace : (x -> out) inplace : (x -> out)
backward : ceil_grad backward : ceil_grad
- op : celu
args : (Tensor x, float alpha = 1.0)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu
backward : celu_grad
- op : cholesky - op : cholesky
args : (Tensor x, bool upper=false) args : (Tensor x, bool upper=false)
output : Tensor output : Tensor
...@@ -207,6 +217,17 @@ ...@@ -207,6 +217,17 @@
data_type : x data_type : x
backward : dot_grad backward : dot_grad
- op : elu
args : (Tensor x, float alpha = 1.0f)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu
inplace : (x -> out)
backward : elu_grad
- op : erf - op : erf
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
...@@ -312,6 +333,16 @@ ...@@ -312,6 +333,16 @@
func : hard_sigmoid func : hard_sigmoid
backward : hardsigmoid_grad backward : hardsigmoid_grad
- op : leaky_relu
args : (Tensor x, float negative_slope = 0.02f)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu
backward : leaky_relu_grad
- op : lgamma - op : lgamma
args : (Tensor x) args : (Tensor x)
output : Tensor(out) output : Tensor(out)
...@@ -321,6 +352,15 @@ ...@@ -321,6 +352,15 @@
func : lgamma func : lgamma
backward : lgamma_grad backward : lgamma_grad
- op : log
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : log
backward: log_grad
- op : log10 - op : log10
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
...@@ -395,6 +435,16 @@ ...@@ -395,6 +435,16 @@
inplace : (x -> out) inplace : (x -> out)
backward : reciprocal_grad backward : reciprocal_grad
- op : relu
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : relu
inplace : (x -> out)
backward : relu_grad
- op : round - op : round
args : (Tensor x) args : (Tensor x)
output : Tensor(out) output : Tensor(out)
...@@ -405,6 +455,16 @@ ...@@ -405,6 +455,16 @@
inplace : (x -> out) inplace : (x -> out)
backward : round_grad backward : round_grad
- op : rsqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : rsqrt
inplace : (x -> out)
backward : rsqrt_grad
- op : send_uv - op : send_uv
args : (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") args : (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD")
output : Tensor(out) output : Tensor(out)
...@@ -415,6 +475,15 @@ ...@@ -415,6 +475,15 @@
data_type : x data_type : x
backward : send_uv_grad backward : send_uv_grad
- op : sigmoid
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : sigmoid
backward : sigmoid_grad
- op : silu - op : silu
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
...@@ -442,6 +511,36 @@ ...@@ -442,6 +511,36 @@
func : sinh func : sinh
backward : sinh_grad backward : sinh_grad
- op : softplus
args : (Tensor x, float beta = 1.0, float threshold = 20.0f)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus
backward : softplus_grad
- op : softshrink
args : (Tensor x, float threshold = 0.5)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softshrink
backward : softshrink_grad
- op : softsign
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign
backward : softsign_grad
- op : solve - op : solve
args : (Tensor x, Tensor y) args : (Tensor x, Tensor y)
output : Tensor output : Tensor
...@@ -452,6 +551,27 @@ ...@@ -452,6 +551,27 @@
data_type : x data_type : x
backward : solve_grad backward : solve_grad
- op : sqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : sqrt {dense -> dense},
sqrt_sr {selected_rows -> selected_rows}
inplace : (x -> out)
backward : sqrt_grad
- op : square
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : square {dense -> dense},
square_sr {selected_rows -> selected_rows}
backward : square_grad
- op : tan - op : tan
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
...@@ -471,6 +591,25 @@ ...@@ -471,6 +591,25 @@
inplace : (x -> out) inplace : (x -> out)
backward : tanh_grad backward : tanh_grad
- op : tanh_shrink
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : tanh_shrink
backward : tanh_shrink_grad
- op : thresholded_relu
args : (Tensor x, float threshold = 1.0)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu
backward : thresholded_relu_grad
- op : trace - op : trace
args : (Tensor x, int offset = 0, int axis1 = 0, int axis2 = 1) args : (Tensor x, int offset = 0, int axis1 = 0, int axis2 = 1)
output : Tensor output : Tensor
......
...@@ -268,7 +268,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel) ...@@ -268,7 +268,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad, PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel) ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel)
......
...@@ -151,7 +151,7 @@ PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel) ...@@ -151,7 +151,7 @@ PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel) PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel) PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel) PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel)
......
...@@ -373,7 +373,7 @@ PD_REGISTER_KERNEL(exp_grad, ...@@ -373,7 +373,7 @@ PD_REGISTER_KERNEL(exp_grad,
int64_t, int64_t,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel)
......
...@@ -242,7 +242,7 @@ PD_REGISTER_KERNEL(square, ...@@ -242,7 +242,7 @@ PD_REGISTER_KERNEL(square,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel) PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel) PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel)
......
...@@ -39,19 +39,8 @@ namespace phi { ...@@ -39,19 +39,8 @@ namespace phi {
#define comma , #define comma ,
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Square, "square", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardTanh, "hard_tanh", "t_min" comma "t_max"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardTanh, "hard_tanh", "t_min" comma "t_max");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LeakyRelu, "leaky_relu", "alpha");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu,
"thresholded_relu",
"threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(SoftShrink, "soft_shrink", "lambda");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Mish, "mish", "threshold"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Mish, "mish", "threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(TanhShrink, "tanh_shrink", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softsign, "softsign", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log, "log", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Celu, "celu", "alpha"); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish, DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish,
"hard_swish", "hard_swish",
"threshold" comma "scale" comma "threshold" comma "scale" comma
...@@ -62,106 +51,8 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(STanh, ...@@ -62,106 +51,8 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(STanh,
"stanh", "stanh",
"scale_a" comma "scale_b"); // NOLINT "scale_a" comma "scale_b"); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus,
"softplus",
"beta" comma "threshold"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold"); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold"); // NOLINT
KernelSignature SqrtActiOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("sqrt", {"X"}, {}, {"Out"});
} else {
return KernelSignature("sqrt_sr", {"X"}, {}, {"Out"});
}
}
KernelSignature SquareActiOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("square", {"X"}, {}, {"Out"});
} else {
return KernelSignature("square_sr", {"X"}, {}, {"Out"});
}
}
KernelSignature ReluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("relu_double_grad", {"Out", "DDX"}, {}, {"DDOut"});
}
KernelSignature SigmoidDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"sigmoid_double_grad", {"Out", "DOut", "DDX"}, {}, {"DOutNew", "DDOut"});
}
KernelSignature SigmoidTripleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("sigmoid_triple_grad",
{"Out", "DOut", "DDX", "D_DOut_New", "D_DDOut"},
{},
{"D_OutNew", "D_DOut", "D_DDx"});
}
KernelSignature LeakyReluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"leaky_relu_double_grad", {"X", "DDX"}, {"alpha"}, {"DDOut"});
}
KernelSignature LeakyReluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("leaky_relu", {"X"}, {"alpha"}, {"Out"});
}
KernelSignature EluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("elu", {"X"}, {"alpha"}, {"Out"});
}
KernelSignature EluGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature(
"elu_grad", {"X", "Out", "Out@GRAD"}, {"alpha"}, {"X@GRAD"});
}
KernelSignature EluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"elu_double_grad", {"X", "DOut", "DDX"}, {"alpha"}, {"DX", "DDOut"});
}
KernelSignature LogDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"log_double_grad", {"X", "DOut", "DDX"}, {}, {"DX", "DDOut"});
}
KernelSignature SqrtDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"sqrt_double_grad", {"Out", "DX", "DDX"}, {}, {"DOut", "DDOut"});
}
KernelSignature RsqrtDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"rsqrt_double_grad", {"Out", "DX", "DDX"}, {}, {"DOut", "DDOut"});
}
KernelSignature CeluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"celu_double_grad", {"X", "DOut", "DDX"}, {"alpha"}, {"DX", "DDOut"});
}
KernelSignature SquareDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"square_double_grad", {"X", "DOut", "DDX"}, {}, {"DX", "DDOut"});
}
KernelSignature PowOpArgumentMapping(const ArgumentMappingContext& ctx) { KernelSignature PowOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.HasInput("FactorTensor")) { if (ctx.HasInput("FactorTensor")) {
return KernelSignature("pow", {"X"}, {"FactorTensor"}, {"Out"}); return KernelSignature("pow", {"X"}, {"FactorTensor"}, {"Out"});
...@@ -182,70 +73,17 @@ KernelSignature PowGradOpArgumentMapping(const ArgumentMappingContext& ctx) { ...@@ -182,70 +73,17 @@ KernelSignature PowGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
} // namespace phi } // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(relu_grad_grad, relu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(leaky_relu_grad_grad, leaky_relu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(softshrink, soft_shrink);
PD_REGISTER_BASE_KERNEL_NAME(softshrink_grad, soft_shrink_grad);
PD_REGISTER_BASE_KERNEL_NAME(elu_grad_grad, elu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(sigmoid_grad_grad, sigmoid_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(log_grad_grad, log_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(sqrt_grad_grad, sqrt_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(rsqrt_grad_grad, rsqrt_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(celu_grad_grad, celu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(square_grad_grad, square_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(brelu, hard_tanh); PD_REGISTER_BASE_KERNEL_NAME(brelu, hard_tanh);
PD_REGISTER_BASE_KERNEL_NAME(brelu_grad, hard_tanh_grad); PD_REGISTER_BASE_KERNEL_NAME(brelu_grad, hard_tanh_grad);
PD_REGISTER_ARG_MAPPING_FN(relu_grad, phi::ReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square_grad, phi::SquareGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt_grad, phi::SqrtGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt_grad_grad,
phi::SqrtDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(rsqrt_grad, phi::RsqrtGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(rsqrt_grad_grad,
phi::RsqrtDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(stanh_grad, phi::STanhGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(stanh_grad, phi::STanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softplus_grad, phi::SoftplusGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu_grad_grad,
phi::ReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(brelu_grad, phi::HardTanhGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(brelu_grad, phi::HardTanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(leaky_relu, phi::LeakyReluOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad,
phi::LeakyReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad_grad,
phi::LeakyReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(thresholded_relu_grad,
phi::ThresholdedReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu6_grad, phi::Relu6GradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(relu6_grad, phi::Relu6GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softshrink_grad,
phi::SoftShrinkGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(tanh_shrink_grad,
phi::TanhShrinkGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elu, phi::EluOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elu_grad, phi::EluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elu_grad_grad, phi::EluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softsign_grad, phi::SoftsignGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sigmoid_grad, phi::SigmoidGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sigmoid_grad_grad,
phi::SigmoidDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sigmoid_triple_grad,
phi::SigmoidTripleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log_grad, phi::LogGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log_grad_grad, phi::LogDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt, phi::SqrtActiOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square, phi::SquareActiOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad, PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad,
phi::HardSwishGradOpArgumentMapping); phi::HardSwishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(pow_grad, phi::PowGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(pow_grad, phi::PowGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(pow, phi::PowOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(pow, phi::PowOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(celu_grad, phi::CeluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(celu_grad_grad,
phi::CeluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square_grad_grad,
phi::SquareDoubleGradOpArgumentMapping);
...@@ -29,7 +29,6 @@ __deprecated_func_name__ = { ...@@ -29,7 +29,6 @@ __deprecated_func_name__ = {
} }
__activations_noattr__ = [ __activations_noattr__ = [
'sigmoid',
'silu', 'silu',
'logsigmoid', 'logsigmoid',
'tanh_shrink', 'tanh_shrink',
...@@ -38,12 +37,7 @@ __activations_noattr__ = [ ...@@ -38,12 +37,7 @@ __activations_noattr__ = [
'tanh', 'tanh',
] ]
__unary_func__ = [ __unary_func__ = ['abs']
'sqrt',
'rsqrt',
'abs',
'square',
]
__inplace_unary_func__ = [ __inplace_unary_func__ = [
'exp_', 'exp_',
...@@ -85,23 +79,6 @@ for _OP in set(__inplace_unary_func__): ...@@ -85,23 +79,6 @@ for _OP in set(__inplace_unary_func__):
_func = generate_inplace_fn(_OP) _func = generate_inplace_fn(_OP)
globals()[_OP] = _func globals()[_OP] = _func
add_sample_code(
globals()["sigmoid"],
r"""
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = F.sigmoid(x)
print(out)
# [0.40131234 0.450166 0.52497919 0.57444252]
""",
)
add_sample_code( add_sample_code(
globals()["silu"], globals()["silu"],
r""" r"""
...@@ -163,38 +140,6 @@ Examples: ...@@ -163,38 +140,6 @@ Examples:
""", """,
) )
add_sample_code(
globals()["sqrt"],
r"""
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.sqrt(x)
print(out)
# [0.31622777 0.4472136 0.54772256 0.63245553]
""",
)
add_sample_code(
globals()["rsqrt"],
r"""
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.rsqrt(x)
print(out)
# [3.16227766 2.23606798 1.82574186 1.58113883]
""",
)
add_sample_code( add_sample_code(
globals()["abs"], globals()["abs"],
r""" r"""
...@@ -211,22 +156,6 @@ Examples: ...@@ -211,22 +156,6 @@ Examples:
""", """,
) )
add_sample_code(
globals()["square"],
r"""
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = paddle.square(x)
print(out)
# [0.16 0.04 0.01 0.09]
""",
)
add_sample_code( add_sample_code(
globals()["softplus"], globals()["softplus"],
r""" r"""
...@@ -812,6 +741,85 @@ def round(x, name=None): ...@@ -812,6 +741,85 @@ def round(x, name=None):
return out return out
def rsqrt(x, name=None):
"""
Rsqrt Activation Operator.
Please make sure input is legal in case of numeric errors.
.. math::
out = \\frac{1}{\\sqrt{x}}
Args:
x (Tensor): Input of Rsqrt operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Rsqrt operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.rsqrt(x)
print(out)
# [3.16227766 2.23606798 1.82574186 1.58113883]
"""
if in_dygraph_mode():
return _C_ops.rsqrt(x)
if _in_legacy_dygraph():
return _legacy_C_ops.rsqrt(x)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'rsqrt')
helper = LayerHelper('rsqrt', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='rsqrt', inputs={"X": x}, outputs={"Out": out})
return out
def sigmoid(x, name=None):
"""
Sigmoid Activation.
.. math::
out = \\frac{1}{1 + e^{-x}}
Args:
x (Tensor): Input of Sigmoid operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Sigmoid operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = F.sigmoid(x)
print(out)
# [0.40131234 0.450166 0.52497919 0.57444252]
"""
if in_dygraph_mode():
return _C_ops.sigmoid(x)
if _in_legacy_dygraph():
return _legacy_C_ops.sigmoid(x)
check_variable_and_dtype(
x, 'x', ['float16', 'float32', 'float64'], 'sigmoid'
)
helper = LayerHelper('sigmoid', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='sigmoid', inputs={"X": x}, outputs={"Out": out})
return out
def sin(x, name=None): def sin(x, name=None):
""" """
Sine Activation Operator. Sine Activation Operator.
...@@ -886,6 +894,91 @@ def sinh(x, name=None): ...@@ -886,6 +894,91 @@ def sinh(x, name=None):
return out return out
def sqrt(x, name=None):
"""
Sqrt Activation Operator.
.. math::
out=\\sqrt{x}=x^{1/2}
Args:
x (Tensor): Input of Sqrt operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Sqrt operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.sqrt(x)
print(out)
# [0.31622777 0.4472136 0.54772256 0.63245553]
"""
if in_dygraph_mode():
return _C_ops.sqrt(x)
if _in_legacy_dygraph():
return _legacy_C_ops.sqrt(x)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'sqrt')
helper = LayerHelper('sqrt', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='sqrt', inputs={"X": x}, outputs={"Out": out})
return out
def square(x, name=None):
"""
Square each elements of the inputs.
.. math::
out = x^2
Args:
x (Tensor): Input of Square operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Square operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = paddle.square(x)
print(out)
# [0.16 0.04 0.01 0.09]
"""
if in_dygraph_mode():
return _C_ops.square(x)
if _in_legacy_dygraph():
return _legacy_C_ops.square(x)
check_variable_and_dtype(
x,
'x',
[
'int32',
'int64',
'float16',
'float32',
'float64',
'complex64',
'complex128',
],
'square',
)
helper = LayerHelper('square', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='square', inputs={"X": x}, outputs={"Out": out})
return out
def tan(x, name=None): def tan(x, name=None):
""" """
Tangent Operator. Computes tangent of x element-wise. Tangent Operator. Computes tangent of x element-wise.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册