Support uniform api and sigmoid api in new AD (#46960)

* support uniform api in new ad * add unit test for uniform_random_p * resolve conflict * fix uniform_random orig2prim * fix primrules * remove ShapeTensor and ShapeTensorList input in uniform_random_p op and add sigmoid orig2prim rules

Support uniform api and sigmoid api in new AD (#46960)
* support uniform api in new ad * add unit test for uniform_random_p * resolve conflict * fix uniform_random orig2prim * fix primrules * remove ShapeTensor and ShapeTensorList input in uniform_random_p op and add sigmoid orig2prim rules
af4bdede · Charles-hit · GitHub · 94132190 · af4bdede · af4bdede
8 changed file
--- a/paddle/fluid/operators/prim_ops/CMakeLists.txt
+++ b/paddle/fluid/operators/prim_ops/CMakeLists.txt
@@ -39,7 +39,8 @@ set(PRIM_OP_SRCS
    bernoulli_p_op.cc
    abs_p_op.cc
    cast_p_op.cc
-    rsqrt_p_op.cc)
+    rsqrt_p_op.cc
+    uniform_random_p_op.cc)

 cc_test(
  prim_op_test

--- a/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc
+++ b/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+
+namespace paddle {
+namespace framework {
+class InferShapeContext;
+class VarDesc;
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace operators {
+class UniformRandomPrimOp : public framework::OperatorBase {
+ public:
+  UniformRandomPrimOp(const std::string &type,
+                      const framework::VariableNameMap &inputs,
+                      const framework::VariableNameMap &outputs,
+                      const framework::AttributeMap &attrs)
+      : framework::OperatorBase(type, inputs, outputs, attrs) {}
+  void RunImpl(const framework::Scope &scope,
+               const platform::Place &dev_place) const override {
+    PADDLE_THROW(platform::errors::Unimplemented(
+        "Prim operator uniform_randrom_p should not be excuted directly"));
+  }
+};
+
+class UniformRandomPrimOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddOutput("Out", "(Tensor), The output tensor of uniform_random_p op.");
+    AddAttr<std::vector<int64_t>>("shape", "The shape of the output tensor")
+        .SetDefault({});
+    AddAttr<float>("min", "Minimum value of uniform_random_p. [default -1.0].");
+    AddAttr<float>("max", "Maximun value of uniform_random_p. [default 1.0].");
+    AddAttr<int>("seed",
+                 "Random seed used for generating samples. "
+                 "0 means use a seed generated by the system."
+                 "Note that if seed is not 0, this operator will always "
+                 "generate the same random numbers every time. ");
+    AddAttr<int>("dtype", "Output tensor data type. ");
+    AddComment(R"DOC(
+Autograd primitive uniform_random_p operator.
+)DOC");
+  }
+};
+
+class UniformRandomPrimOpShapeInference : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext *ctx) const override {
+    framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Out")[0];
+    auto shape = ctx->Attrs().Get<std::vector<int64_t>>("shape");
+    PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape);
+  }
+};
+
+class UniformRandomPrimOpVarTypeInference
+    : public framework::StaticGraphVarTypeInference {
+ public:
+  void operator()(framework::InferVarTypeContext *ctx) const override {
+    auto y_name = Output(ctx, "Out")[0];
+    auto data_type = static_cast<framework::proto::VarType::Type>(
+        PADDLE_GET_CONST(int, ctx->GetAttr("dtype")));
+    SetDataType(ctx, y_name, data_type);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OPERATOR(uniform_random_p,
+                  paddle::operators::UniformRandomPrimOp,
+                  paddle::operators::UniformRandomPrimOpMaker,
+                  paddle::operators::UniformRandomPrimOpShapeInference,
+                  paddle::operators::UniformRandomPrimOpVarTypeInference);
--- a/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py
@@ -1083,5 +1083,44 @@ class TestFillConstantOrig2Prim(TestElementWiseAddOrig2Prim):
        self.out_map = {0: self.output['Out']}


+class TestUniformRandomOrig2Prim(TestElementWiseAddOrig2Prim):
+
+    def init_data(self):
+        self.op_type = 'uniform_random'
+        self.input = {}
+        self.output = {
+            'Out':
+            self.layer_help.create_variable_for_type_inference(
+                dtype=paddle.float32)
+        }
+        self.attrs = {'shape': [1, 2]}
+
+        self.orig2prim_args = (None, None)
+        self.all_ops = ['uniform_random', 'uniform_random_p']
+        self.out_map = {0: self.output['Out']}
+
+
+class TestSigmoidOrig2Prim(TestElementWiseAddOrig2Prim):
+
+    def init_data(self):
+        self.op_type = 'sigmoid'
+        X = paddle.static.data(name='X', shape=[3], dtype='float32')
+
+        self.attrs = {}
+        self.input = {'X': X}
+        self.output = {
+            'Out':
+            self.layer_help.create_variable_for_type_inference(
+                dtype=paddle.float32)
+        }
+
+        self.orig2prim_args = (X, )
+        self.all_ops = [
+            'sigmoid', 'div_p', 'fill_constant_p', 'add_p', 'fill_constant_p',
+            'exp_p', 'fill_constant_p', 'sub_p'
+        ]
+        self.out_map = {0: self.output['Out']}
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py
@@ -728,5 +728,29 @@ class TestRsqrtPrim2Orig(TestAddPPrim2Orig):
        self.out_map = {self.output['Y']: 0}


+class TestUniformRandomPrim2Orig(TestAddPPrim2Orig):
+
+    def init_data(self):
+        self.op_type = 'uniform_random_p'
+
+        self.input = {}
+        self.output = {
+            'Out':
+            self.layer_help.create_variable_for_type_inference(
+                dtype=paddle.float64)
+        }
+        self.attrs = {
+            'shape': [1, 2, 3],
+            'min': -1.0,
+            'max': 1.0,
+            'seed': 0,
+            'dtype': paddle.float64
+        }
+
+        self.prim2orig_args = ()
+        self.all_ops = ['uniform_random_p', 'uniform_random']
+        self.out_map = {self.output['Out']: 0}
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/autograd/test_primapi.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_primapi.py
@@ -23,6 +23,66 @@ import autograd.numpy as anp
 import autograd.scipy as ascipy
 import config
 import utils
+from paddle.incubate.autograd import primx
+
+
+@utils.place(config.DEVICES)
+@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'dtype'), (
+    ('uniform_random',
+     lambda: paddle.uniform([1, 2, 3], dtype='float32', min=0, max=1.0, seed=1),
+     (), 'int32'), ('sigmoid', paddle.nn.functional.sigmoid,
+                    (np.random.rand(5, ), ), 'float32')))
+class TestFowardApi(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
+
+    def setUp(self):
+        paddle.enable_static()
+        paddle.incubate.autograd.enable_prim()
+
+    def tearDown(self):
+        paddle.incubate.autograd.disable_prim()
+        paddle.disable_static()
+
+    def test_grad(self):
+
+        def expected():
+            paddle.incubate.autograd.disable_prim()
+            sp = paddle.static.Program()
+            mp = paddle.static.Program()
+            with paddle.static.program_guard(mp, sp):
+                feed, static_xs = utils.gen_static_inputs_and_feed(
+                    self.xs, stop_gradient=False)
+                out = self.fun(*static_xs)
+            exe = paddle.static.Executor()
+            exe.run(sp)
+            out = exe.run(mp, feed=feed, fetch_list=out)
+            paddle.incubate.autograd.enable_prim()
+            return out
+
+        def actual():
+            paddle.incubate.autograd.enable_prim()
+            sp = paddle.static.Program()
+            mp = paddle.static.Program()
+            with paddle.static.program_guard(mp, sp):
+                feed, static_xs = utils.gen_static_inputs_and_feed(
+                    self.xs, stop_gradient=False)
+                out = self.fun(*static_xs)
+                primx.orig2prim(mp.block(0))
+                primx.prim2orig(mp.block(0))
+            exe = paddle.static.Executor()
+            exe.run(sp)
+            out = exe.run(mp, feed=feed, fetch_list=out)
+            paddle.incubate.autograd.disable_prim()
+            return out
+
+        expected = expected()
+        actual = actual()
+        self.assertEqual(type(actual), type(expected))
+        for i, j in zip(actual, expected):
+            np.testing.assert_allclose(i, j, atol=1e-3, rtol=1e-3)


 @utils.place(config.DEVICES)
@@ -85,7 +145,7 @@ class TestDropoutGrad(unittest.TestCase):
        actual = actual()
        self.assertEqual(type(actual), type(expected))
        for i, j in zip(actual, expected):
-            np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-3)
+            np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-1)


 @utils.place(config.DEVICES)
@@ -200,23 +260,25 @@ class TestWithoutProgramGuard(unittest.TestCase):


 @utils.place(config.DEVICES)
-@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'), (
-    ('matmul', paddle.matmul,
-     (np.random.rand(2, 3), np.random.rand(3, 2)), None, 'float32'),
-    ('multiply', paddle.multiply,
-     (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float64'),
-    ('add', paddle.add,
-     (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
-    ('input_not_sequence', paddle.tanh,
-     (np.random.rand(5, 5), ), None, 'float64'),
-    ('input_gradients_not_none', paddle.matmul,
-     (np.random.rand(3, 3), np.random.rand(3, 3)),
-     (np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'),
-    ('log', paddle.log, (np.random.rand(3, 4), ), None, 'float32'),
-    ('abs', paddle.abs, (np.random.uniform(-10, 10,
-                                           (10, 10)), ), None, 'float32'),
-    ('rsqrt', paddle.rsqrt, (np.random.rand(100, 200), ), None, 'float32'),
-))
+@utils.parameterize(
+    (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'),
+    (('matmul', paddle.matmul,
+      (np.random.rand(2, 3), np.random.rand(3, 2)), None, 'float32'),
+     ('multiply', paddle.multiply,
+      (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float64'),
+     ('add', paddle.add,
+      (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
+     ('input_not_sequence', paddle.tanh,
+      (np.random.rand(5, 5), ), None, 'float64'),
+     ('input_gradients_not_none', paddle.matmul,
+      (np.random.rand(3, 3), np.random.rand(3, 3)),
+      (np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'),
+     ('log', paddle.log, (np.random.rand(3, 4), ), None, 'float32'),
+     ('abs', paddle.abs, (np.random.uniform(-10, 10,
+                                            (10, 10)), ), None, 'float32'),
+     ('rsqrt', paddle.rsqrt, (np.random.rand(100, 200), ), None, 'float32'),
+     ('sigmoid', paddle.nn.functional.sigmoid,
+      (np.random.rand(5, ), ), None, 'float32')))
 # paddle.where, paddle.pow, paddle.maximum has no double grad definition,
 # can not compute forward grad use double trick
 class TestForwardGrad(unittest.TestCase):
@@ -353,6 +415,8 @@ where_wrap = lambda x, y: paddle.where(paddle.eye(3, 4) == 1, x, y)
        ('gelu_approximate', lambda x: paddle.nn.functional.gelu(x, True),
         (np.random.rand(200, 189), ), None, 'float32'),
        ('sum', paddle.sum, (np.random.rand(200, 345), ), None, 'float32'),
+        ('sigmoid', paddle.nn.functional.sigmoid,
+         (np.random.rand(5, ), ), None, 'float32'),
        ('sum_with_axis', lambda x: paddle.sum(x, axis=1),
         (np.random.rand(200, 345), ), None, 'float32'),
        ('sum_with_keepdim', lambda x: paddle.sum(x, keepdim=True),
@@ -538,6 +602,7 @@ exp_ag = lambda xs: anp.exp(xs[0])
 pow_ag = lambda xs: xs[0]**xs[1]
 log_ag = lambda xs: anp.log(xs[0])
 erf_ag = lambda xs: ascipy.special.erf(xs[0])
+sigmoid_ag = lambda xs: 1.0 / (1 + anp.exp(-xs[0]))


 def gelu_ag(x, approximate=False):
@@ -551,22 +616,26 @@ def gelu_ag(x, approximate=False):

 @utils.place(config.DEVICES)
 @utils.parameterize(
-    (utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'),
-    (('multiply', multiply_pd, multiply_ag,
-      (np.random.rand(3, 5), ), None, 'float32'),
-     ('sin', paddle.sin, sin_ag, (np.random.rand(2, 3), ), None, 'float32'),
-     ('cos', paddle.cos, cos_ag, (np.random.rand(3, 4), ), None, 'float32'),
-     ('exp', paddle.exp, exp_ag, (np.random.rand(2, 3), ), None, 'float32'),
-     ('pow', paddle.pow, pow_ag,
-      (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
-     ('log', paddle.log, log_ag, (np.random.rand(3, 8), ), None, 'float32'),
-     ('erf', paddle.erf, erf_ag, (np.random.rand(100, 200), ), None, 'float32'),
-     ('gelu', paddle.nn.functional.gelu, lambda xs: gelu_ag(xs[0]),
-      (np.random.rand(10, 20, 30), ), None, 'float32'),
-     ('gelu_approximate',
-      lambda x: paddle.nn.functional.gelu(x, approximate=True),
-      lambda xs: gelu_ag(xs[0], approximate=True),
-      (np.random.rand(10, 20, 30), ), None, 'float32')))
+    (utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'), (
+        ('multiply', multiply_pd, multiply_ag,
+         (np.random.rand(3, 5), ), None, 'float32'),
+        ('sin', paddle.sin, sin_ag, (np.random.rand(2, 3), ), None, 'float32'),
+        ('cos', paddle.cos, cos_ag, (np.random.rand(3, 4), ), None, 'float32'),
+        ('exp', paddle.exp, exp_ag, (np.random.rand(2, 3), ), None, 'float32'),
+        ('pow', paddle.pow, pow_ag,
+         (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'),
+        ('log', paddle.log, log_ag, (np.random.rand(3, 8), ), None, 'float32'),
+        ('erf', paddle.erf, erf_ag,
+         (np.random.rand(100, 200), ), None, 'float32'),
+        ('gelu', paddle.nn.functional.gelu, lambda xs: gelu_ag(xs[0]),
+         (np.random.rand(10, 20, 30), ), None, 'float32'),
+        ('gelu_approximate',
+         lambda x: paddle.nn.functional.gelu(x, approximate=True),
+         lambda xs: gelu_ag(xs[0], approximate=True),
+         (np.random.rand(10, 20, 30), ), None, 'float32'),
+        ('sigmoid', paddle.nn.functional.sigmoid, sigmoid_ag,
+         (np.random.rand(10, 20), ), None, 'float32'),
+    ))
 class TestGradWithHigherOrder(unittest.TestCase):

    def setUp(self):

--- a/python/paddle/fluid/tests/unittests/autograd/utils.py
+++ b/python/paddle/fluid/tests/unittests/autograd/utils.py
@@ -420,3 +420,19 @@ def gen_static_data_and_feed(xs, v, stop_gradient=True):
        static_v = v

    return feed, static_xs, static_v
+
+
+def gen_static_inputs_and_feed(xs, stop_gradient=True):
+    feed = {}
+    if isinstance(xs, typing.Sequence):
+        static_xs = []
+        for i, x in enumerate(xs):
+            x = paddle.static.data(f"x{i}", x.shape, x.dtype)
+            x.stop_gradient = stop_gradient
+            static_xs.append(x)
+        feed.update({f'x{idx}': value for idx, value in enumerate(xs)})
+    else:
+        static_xs = paddle.static.data('x', xs.shape, xs.dtype)
+        static_xs.stop_gradient = stop_gradient
+        feed.update({'x': xs})
+    return feed, static_xs
--- a/python/paddle/incubate/autograd/primops.py
+++ b/python/paddle/incubate/autograd/primops.py
@@ -502,3 +502,19 @@ def cast(x, dtype, out=None):
 @REGISTER_FN('rsqrt_p', 'X', 'Y')
 def rsqrt(x, out=None):
    return _simple_unop(LayerHelper('rsqrt_p', **locals()))
+
+
+@REGISTER_FN('uniform_random_p', 'Out')
+def uniform_random(dtype, min_value, max_value, seed, shape=None, out=None):
+    attrs = {
+        'shape': shape,
+        'dtype': dtype,
+        'min': min_value,
+        'max': max_value,
+        'seed': seed
+    }
+    helper = LayerHelper('uniform_random_p', **locals())
+    if out is None:
+        out = helper.create_variable_for_type_inference(dtype)
+    helper.append_op(type=helper.layer_type, outputs={'Out': out}, attrs=attrs)
+    return out
--- a/python/paddle/incubate/autograd/primrules.py
+++ b/python/paddle/incubate/autograd/primrules.py
@@ -23,14 +23,12 @@ from .primops import (add, broadcast, concat, cos, div, eq, erf, exp,
                      fill_const, gather, ge, gt, log, matmul, mul, ne, neg,
                      reduce_sum, reshape, scatter_add, select, set_value, sin,
                      slice_assign, slice_select, split, sqrt, sub, tanh,
-                      transpose, bernoulli, rsqrt)
+                      transpose, bernoulli, rsqrt, uniform_random)
 from .primreg import (REGISTER_JVP, REGISTER_ORIG2PRIM, REGISTER_PRIM2ORIG,
                      REGISTER_TRANSPOSE, lookup_fn, lookup_jvp,
                      lookup_orig2prim, lookup_prim2orig, lookup_transpose,
                      op_position_inputs, op_position_output)
 from .utils import INT_DTYPE_2_STRING, get_output_var_list
-from paddle.fluid.data_feeder import convert_dtype
-from paddle.fluid.framework import convert_np_dtype_to_dtype_


 def _orig2prim(op, *args):
@@ -79,6 +77,7 @@ select
 equal
 elementwise_pow
 dropout
+uniform_random

 These original ops are partially supported:

@@ -212,8 +211,7 @@ def fill_any_like_orig2prim(op, x):
        return fill_const(value=op.attr('value'), shape=x.shape, dtype=x.dtype)
    return fill_const(value=op.attr('value'),
                      shape=x.shape,
-                      dtype=convert_np_dtype_to_dtype_(
-                          convert_dtype(INT_DTYPE_2_STRING[op.attr('dtype')])))
+                      dtype=paddle.dtype(op.attr('dtype')))


 @REGISTER_ORIG2PRIM('fill_constant')
@@ -327,6 +325,13 @@ def slice_orig2prim(op, ends_t, ends_tl, x, starts_t, starts_tl):
    return y


+@REGISTER_ORIG2PRIM('sigmoid')
+def sigmoid_orig2prim(op, x):
+    return div(
+        fill_const(value=1.0, shape=x.shape, dtype=x.dtype),
+        (add(fill_const(value=1.0, shape=x.shape, dtype=x.dtype), exp(neg(x)))))
+
+
 @REGISTER_ORIG2PRIM('p_norm')
 def p_norm_orig2prim(op, x):

@@ -464,6 +469,20 @@ def dropout_orig2prim(op, seed_t, x):
        )


+@REGISTER_ORIG2PRIM('uniform_random')
+def uniform_random_orig2prim(op, shape_t, shape_tl):
+    if shape_t or shape_tl:
+        raise TypeError(
+            'uniform_random_orig2prim currently not support ShapeTensor input or ShapeTensorList input.'
+        )
+    min_value = op.attr('min')
+    max_value = op.attr('max')
+    seed = op.attr('seed')
+    dtype = paddle.dtype(op.attr('dtype'))
+    shape = op.attr('shape')
+    return uniform_random(dtype, min_value, max_value, seed, shape=shape)
+
+
 @REGISTER_ORIG2PRIM('reduce_sum')
 def reduce_sum_orig2prim(op, x):
    axes = tuple(range(0, len(
@@ -667,6 +686,15 @@ def bernoulli_prim2orig(op):
    return paddle.bernoulli(t)


+@REGISTER_PRIM2ORIG('uniform_random_p')
+def uniform_random_prim2orig(op):
+    return paddle.uniform(shape=op.attr('shape'),
+                          dtype=INT_DTYPE_2_STRING[op.attr('dtype')],
+                          min=op.attr('min'),
+                          max=op.attr('max'),
+                          seed=op.attr('seed'))
+
+
 @REGISTER_PRIM2ORIG('select_p')
 def select_prim2orig(op, condition, x, y):
    return paddle.where(condition, x, y)