未验证 提交 af4bdede 编写于 作者: C Charles-hit 提交者: GitHub

Support uniform api and sigmoid api in new AD (#46960)

* support uniform api in new ad

* add unit test for uniform_random_p

* resolve conflict

* fix uniform_random orig2prim

* fix primrules

* remove ShapeTensor and ShapeTensorList input in uniform_random_p op and add sigmoid orig2prim rules
上级 94132190
......@@ -39,7 +39,8 @@ set(PRIM_OP_SRCS
bernoulli_p_op.cc
abs_p_op.cc
cast_p_op.cc
rsqrt_p_op.cc)
rsqrt_p_op.cc
uniform_random_p_op.cc)
cc_test(
prim_op_test
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace framework {
class InferShapeContext;
class VarDesc;
} // namespace framework
} // namespace paddle
namespace paddle {
namespace operators {
class UniformRandomPrimOp : public framework::OperatorBase {
public:
UniformRandomPrimOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
PADDLE_THROW(platform::errors::Unimplemented(
"Prim operator uniform_randrom_p should not be excuted directly"));
}
};
class UniformRandomPrimOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddOutput("Out", "(Tensor), The output tensor of uniform_random_p op.");
AddAttr<std::vector<int64_t>>("shape", "The shape of the output tensor")
.SetDefault({});
AddAttr<float>("min", "Minimum value of uniform_random_p. [default -1.0].");
AddAttr<float>("max", "Maximun value of uniform_random_p. [default 1.0].");
AddAttr<int>("seed",
"Random seed used for generating samples. "
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time. ");
AddAttr<int>("dtype", "Output tensor data type. ");
AddComment(R"DOC(
Autograd primitive uniform_random_p operator.
)DOC");
}
};
class UniformRandomPrimOpShapeInference : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *ctx) const override {
framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Out")[0];
auto shape = ctx->Attrs().Get<std::vector<int64_t>>("shape");
PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape);
}
};
class UniformRandomPrimOpVarTypeInference
: public framework::StaticGraphVarTypeInference {
public:
void operator()(framework::InferVarTypeContext *ctx) const override {
auto y_name = Output(ctx, "Out")[0];
auto data_type = static_cast<framework::proto::VarType::Type>(
PADDLE_GET_CONST(int, ctx->GetAttr("dtype")));
SetDataType(ctx, y_name, data_type);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(uniform_random_p,
paddle::operators::UniformRandomPrimOp,
paddle::operators::UniformRandomPrimOpMaker,
paddle::operators::UniformRandomPrimOpShapeInference,
paddle::operators::UniformRandomPrimOpVarTypeInference);
......@@ -1083,5 +1083,44 @@ class TestFillConstantOrig2Prim(TestElementWiseAddOrig2Prim):
self.out_map = {0: self.output['Out']}
class TestUniformRandomOrig2Prim(TestElementWiseAddOrig2Prim):
def init_data(self):
self.op_type = 'uniform_random'
self.input = {}
self.output = {
'Out':
self.layer_help.create_variable_for_type_inference(
dtype=paddle.float32)
}
self.attrs = {'shape': [1, 2]}
self.orig2prim_args = (None, None)
self.all_ops = ['uniform_random', 'uniform_random_p']
self.out_map = {0: self.output['Out']}
class TestSigmoidOrig2Prim(TestElementWiseAddOrig2Prim):
def init_data(self):
self.op_type = 'sigmoid'
X = paddle.static.data(name='X', shape=[3], dtype='float32')
self.attrs = {}
self.input = {'X': X}
self.output = {
'Out':
self.layer_help.create_variable_for_type_inference(
dtype=paddle.float32)
}
self.orig2prim_args = (X, )
self.all_ops = [
'sigmoid', 'div_p', 'fill_constant_p', 'add_p', 'fill_constant_p',
'exp_p', 'fill_constant_p', 'sub_p'
]
self.out_map = {0: self.output['Out']}
if __name__ == '__main__':
unittest.main()
......@@ -728,5 +728,29 @@ class TestRsqrtPrim2Orig(TestAddPPrim2Orig):
self.out_map = {self.output['Y']: 0}
class TestUniformRandomPrim2Orig(TestAddPPrim2Orig):
def init_data(self):
self.op_type = 'uniform_random_p'
self.input = {}
self.output = {
'Out':
self.layer_help.create_variable_for_type_inference(
dtype=paddle.float64)
}
self.attrs = {
'shape': [1, 2, 3],
'min': -1.0,
'max': 1.0,
'seed': 0,
'dtype': paddle.float64
}
self.prim2orig_args = ()
self.all_ops = ['uniform_random_p', 'uniform_random']
self.out_map = {self.output['Out']: 0}
if __name__ == '__main__':
unittest.main()
......@@ -23,6 +23,66 @@ import autograd.numpy as anp
import autograd.scipy as ascipy
import config
import utils
from paddle.incubate.autograd import primx
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'dtype'), (
('uniform_random',
lambda: paddle.uniform([1, 2, 3], dtype='float32', min=0, max=1.0, seed=1),
(), 'int32'), ('sigmoid', paddle.nn.functional.sigmoid,
(np.random.rand(5, ), ), 'float32')))
class TestFowardApi(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
def setUp(self):
paddle.enable_static()
paddle.incubate.autograd.enable_prim()
def tearDown(self):
paddle.incubate.autograd.disable_prim()
paddle.disable_static()
def test_grad(self):
def expected():
paddle.incubate.autograd.disable_prim()
sp = paddle.static.Program()
mp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
feed, static_xs = utils.gen_static_inputs_and_feed(
self.xs, stop_gradient=False)
out = self.fun(*static_xs)
exe = paddle.static.Executor()
exe.run(sp)
out = exe.run(mp, feed=feed, fetch_list=out)
paddle.incubate.autograd.enable_prim()
return out
def actual():
paddle.incubate.autograd.enable_prim()
sp = paddle.static.Program()
mp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
feed, static_xs = utils.gen_static_inputs_and_feed(
self.xs, stop_gradient=False)
out = self.fun(*static_xs)
primx.orig2prim(mp.block(0))
primx.prim2orig(mp.block(0))
exe = paddle.static.Executor()
exe.run(sp)
out = exe.run(mp, feed=feed, fetch_list=out)
paddle.incubate.autograd.disable_prim()
return out
expected = expected()
actual = actual()
self.assertEqual(type(actual), type(expected))
for i, j in zip(actual, expected):
np.testing.assert_allclose(i, j, atol=1e-3, rtol=1e-3)
@utils.place(config.DEVICES)
......@@ -85,7 +145,7 @@ class TestDropoutGrad(unittest.TestCase):
actual = actual()
self.assertEqual(type(actual), type(expected))
for i, j in zip(actual, expected):
np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-3)
np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-1)
@utils.place(config.DEVICES)
......@@ -200,23 +260,25 @@ class TestWithoutProgramGuard(unittest.TestCase):
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'), (
('matmul', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), None, 'float32'),
('multiply', paddle.multiply,
(np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float64'),
('add', paddle.add,
(np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
('input_not_sequence', paddle.tanh,
(np.random.rand(5, 5), ), None, 'float64'),
('input_gradients_not_none', paddle.matmul,
(np.random.rand(3, 3), np.random.rand(3, 3)),
(np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'),
('log', paddle.log, (np.random.rand(3, 4), ), None, 'float32'),
('abs', paddle.abs, (np.random.uniform(-10, 10,
(10, 10)), ), None, 'float32'),
('rsqrt', paddle.rsqrt, (np.random.rand(100, 200), ), None, 'float32'),
))
@utils.parameterize(
(utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'),
(('matmul', paddle.matmul,
(np.random.rand(2, 3), np.random.rand(3, 2)), None, 'float32'),
('multiply', paddle.multiply,
(np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float64'),
('add', paddle.add,
(np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
('input_not_sequence', paddle.tanh,
(np.random.rand(5, 5), ), None, 'float64'),
('input_gradients_not_none', paddle.matmul,
(np.random.rand(3, 3), np.random.rand(3, 3)),
(np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'),
('log', paddle.log, (np.random.rand(3, 4), ), None, 'float32'),
('abs', paddle.abs, (np.random.uniform(-10, 10,
(10, 10)), ), None, 'float32'),
('rsqrt', paddle.rsqrt, (np.random.rand(100, 200), ), None, 'float32'),
('sigmoid', paddle.nn.functional.sigmoid,
(np.random.rand(5, ), ), None, 'float32')))
# paddle.where, paddle.pow, paddle.maximum has no double grad definition,
# can not compute forward grad use double trick
class TestForwardGrad(unittest.TestCase):
......@@ -353,6 +415,8 @@ where_wrap = lambda x, y: paddle.where(paddle.eye(3, 4) == 1, x, y)
('gelu_approximate', lambda x: paddle.nn.functional.gelu(x, True),
(np.random.rand(200, 189), ), None, 'float32'),
('sum', paddle.sum, (np.random.rand(200, 345), ), None, 'float32'),
('sigmoid', paddle.nn.functional.sigmoid,
(np.random.rand(5, ), ), None, 'float32'),
('sum_with_axis', lambda x: paddle.sum(x, axis=1),
(np.random.rand(200, 345), ), None, 'float32'),
('sum_with_keepdim', lambda x: paddle.sum(x, keepdim=True),
......@@ -538,6 +602,7 @@ exp_ag = lambda xs: anp.exp(xs[0])
pow_ag = lambda xs: xs[0]**xs[1]
log_ag = lambda xs: anp.log(xs[0])
erf_ag = lambda xs: ascipy.special.erf(xs[0])
sigmoid_ag = lambda xs: 1.0 / (1 + anp.exp(-xs[0]))
def gelu_ag(x, approximate=False):
......@@ -551,22 +616,26 @@ def gelu_ag(x, approximate=False):
@utils.place(config.DEVICES)
@utils.parameterize(
(utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'),
(('multiply', multiply_pd, multiply_ag,
(np.random.rand(3, 5), ), None, 'float32'),
('sin', paddle.sin, sin_ag, (np.random.rand(2, 3), ), None, 'float32'),
('cos', paddle.cos, cos_ag, (np.random.rand(3, 4), ), None, 'float32'),
('exp', paddle.exp, exp_ag, (np.random.rand(2, 3), ), None, 'float32'),
('pow', paddle.pow, pow_ag,
(np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
('log', paddle.log, log_ag, (np.random.rand(3, 8), ), None, 'float32'),
('erf', paddle.erf, erf_ag, (np.random.rand(100, 200), ), None, 'float32'),
('gelu', paddle.nn.functional.gelu, lambda xs: gelu_ag(xs[0]),
(np.random.rand(10, 20, 30), ), None, 'float32'),
('gelu_approximate',
lambda x: paddle.nn.functional.gelu(x, approximate=True),
lambda xs: gelu_ag(xs[0], approximate=True),
(np.random.rand(10, 20, 30), ), None, 'float32')))
(utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'), (
('multiply', multiply_pd, multiply_ag,
(np.random.rand(3, 5), ), None, 'float32'),
('sin', paddle.sin, sin_ag, (np.random.rand(2, 3), ), None, 'float32'),
('cos', paddle.cos, cos_ag, (np.random.rand(3, 4), ), None, 'float32'),
('exp', paddle.exp, exp_ag, (np.random.rand(2, 3), ), None, 'float32'),
('pow', paddle.pow, pow_ag,
(np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
('log', paddle.log, log_ag, (np.random.rand(3, 8), ), None, 'float32'),
('erf', paddle.erf, erf_ag,
(np.random.rand(100, 200), ), None, 'float32'),
('gelu', paddle.nn.functional.gelu, lambda xs: gelu_ag(xs[0]),
(np.random.rand(10, 20, 30), ), None, 'float32'),
('gelu_approximate',
lambda x: paddle.nn.functional.gelu(x, approximate=True),
lambda xs: gelu_ag(xs[0], approximate=True),
(np.random.rand(10, 20, 30), ), None, 'float32'),
('sigmoid', paddle.nn.functional.sigmoid, sigmoid_ag,
(np.random.rand(10, 20), ), None, 'float32'),
))
class TestGradWithHigherOrder(unittest.TestCase):
def setUp(self):
......
......@@ -420,3 +420,19 @@ def gen_static_data_and_feed(xs, v, stop_gradient=True):
static_v = v
return feed, static_xs, static_v
def gen_static_inputs_and_feed(xs, stop_gradient=True):
feed = {}
if isinstance(xs, typing.Sequence):
static_xs = []
for i, x in enumerate(xs):
x = paddle.static.data(f"x{i}", x.shape, x.dtype)
x.stop_gradient = stop_gradient
static_xs.append(x)
feed.update({f'x{idx}': value for idx, value in enumerate(xs)})
else:
static_xs = paddle.static.data('x', xs.shape, xs.dtype)
static_xs.stop_gradient = stop_gradient
feed.update({'x': xs})
return feed, static_xs
......@@ -502,3 +502,19 @@ def cast(x, dtype, out=None):
@REGISTER_FN('rsqrt_p', 'X', 'Y')
def rsqrt(x, out=None):
return _simple_unop(LayerHelper('rsqrt_p', **locals()))
@REGISTER_FN('uniform_random_p', 'Out')
def uniform_random(dtype, min_value, max_value, seed, shape=None, out=None):
attrs = {
'shape': shape,
'dtype': dtype,
'min': min_value,
'max': max_value,
'seed': seed
}
helper = LayerHelper('uniform_random_p', **locals())
if out is None:
out = helper.create_variable_for_type_inference(dtype)
helper.append_op(type=helper.layer_type, outputs={'Out': out}, attrs=attrs)
return out
......@@ -23,14 +23,12 @@ from .primops import (add, broadcast, concat, cos, div, eq, erf, exp,
fill_const, gather, ge, gt, log, matmul, mul, ne, neg,
reduce_sum, reshape, scatter_add, select, set_value, sin,
slice_assign, slice_select, split, sqrt, sub, tanh,
transpose, bernoulli, rsqrt)
transpose, bernoulli, rsqrt, uniform_random)
from .primreg import (REGISTER_JVP, REGISTER_ORIG2PRIM, REGISTER_PRIM2ORIG,
REGISTER_TRANSPOSE, lookup_fn, lookup_jvp,
lookup_orig2prim, lookup_prim2orig, lookup_transpose,
op_position_inputs, op_position_output)
from .utils import INT_DTYPE_2_STRING, get_output_var_list
from paddle.fluid.data_feeder import convert_dtype
from paddle.fluid.framework import convert_np_dtype_to_dtype_
def _orig2prim(op, *args):
......@@ -79,6 +77,7 @@ select
equal
elementwise_pow
dropout
uniform_random
These original ops are partially supported:
......@@ -212,8 +211,7 @@ def fill_any_like_orig2prim(op, x):
return fill_const(value=op.attr('value'), shape=x.shape, dtype=x.dtype)
return fill_const(value=op.attr('value'),
shape=x.shape,
dtype=convert_np_dtype_to_dtype_(
convert_dtype(INT_DTYPE_2_STRING[op.attr('dtype')])))
dtype=paddle.dtype(op.attr('dtype')))
@REGISTER_ORIG2PRIM('fill_constant')
......@@ -327,6 +325,13 @@ def slice_orig2prim(op, ends_t, ends_tl, x, starts_t, starts_tl):
return y
@REGISTER_ORIG2PRIM('sigmoid')
def sigmoid_orig2prim(op, x):
return div(
fill_const(value=1.0, shape=x.shape, dtype=x.dtype),
(add(fill_const(value=1.0, shape=x.shape, dtype=x.dtype), exp(neg(x)))))
@REGISTER_ORIG2PRIM('p_norm')
def p_norm_orig2prim(op, x):
......@@ -464,6 +469,20 @@ def dropout_orig2prim(op, seed_t, x):
)
@REGISTER_ORIG2PRIM('uniform_random')
def uniform_random_orig2prim(op, shape_t, shape_tl):
if shape_t or shape_tl:
raise TypeError(
'uniform_random_orig2prim currently not support ShapeTensor input or ShapeTensorList input.'
)
min_value = op.attr('min')
max_value = op.attr('max')
seed = op.attr('seed')
dtype = paddle.dtype(op.attr('dtype'))
shape = op.attr('shape')
return uniform_random(dtype, min_value, max_value, seed, shape=shape)
@REGISTER_ORIG2PRIM('reduce_sum')
def reduce_sum_orig2prim(op, x):
axes = tuple(range(0, len(
......@@ -667,6 +686,15 @@ def bernoulli_prim2orig(op):
return paddle.bernoulli(t)
@REGISTER_PRIM2ORIG('uniform_random_p')
def uniform_random_prim2orig(op):
return paddle.uniform(shape=op.attr('shape'),
dtype=INT_DTYPE_2_STRING[op.attr('dtype')],
min=op.attr('min'),
max=op.attr('max'),
seed=op.attr('seed'))
@REGISTER_PRIM2ORIG('select_p')
def select_prim2orig(op, condition, x, y):
return paddle.where(condition, x, y)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册