add fill_constant_batch_size_like_op to Static RNN's h_boot (#5332)

20667e1e · Yang Yang(Tony) · GitHub · 70154597 · 20667e1e · 20667e1e
4 changed file
--- a/paddle/operators/fill_constant_batch_size_like_op.cc
+++ b/paddle/operators/fill_constant_batch_size_like_op.cc
@@ -34,15 +34,18 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel {
    std::vector<int64_t> shape_int64(shape.size(), 0);
    std::transform(shape.begin(), shape.end(), shape_int64.begin(),
                   [](int a) { return static_cast<int64_t>(a); });
-    auto dims = framework::make_ddim(shape_int64);
+    auto output_dim = framework::make_ddim(shape_int64);

-    int dim_idx = ctx->Attrs().Get<int>("dim_idx");
-    PADDLE_ENFORCE_GE(dim_idx, 0);
-    PADDLE_ENFORCE_GT(static_cast<int>(shape.size()), dim_idx);
-    PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), dim_idx);
+    int input_dim_idx = ctx->Attrs().Get<int>("input_dim_idx");
+    PADDLE_ENFORCE_GE(input_dim_idx, 0);
+    PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), input_dim_idx);

-    dims[dim_idx] = ctx->GetInputDim("Input")[dim_idx];
-    ctx->SetOutputDim("Out", dims);
+    int output_dim_idx = ctx->Attrs().Get<int>("output_dim_idx");
+    PADDLE_ENFORCE_GE(output_dim_idx, 0);
+    PADDLE_ENFORCE_GT(static_cast<int>(shape.size()), output_dim_idx);
+
+    output_dim[output_dim_idx] = ctx->GetInputDim("Input")[input_dim_idx];
+    ctx->SetOutputDim("Out", output_dim);
  }

 protected:
@@ -69,8 +72,11 @@ class FillConstantBatchSizeLikeOpMaker
              "(Tensor) Tensor of specified shape will be filled "
              "with the specified value");
    AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output");
-    AddAttr<int>("dim_idx",
-                 "(int, default 0) The index of batch size dimension")
+    AddAttr<int>("input_dim_idx",
+                 "(int, default 0) the index of input's batch size dimension")
+        .SetDefault(0);
+    AddAttr<int>("output_dim_idx",
+                 "(int, default 0) the index of output's batch size dimension")
        .SetDefault(0);
    AddAttr<float>("value", "(float, default 0) The value to be filled")
        .SetDefault(0.0f);
@@ -86,8 +92,9 @@ Fill up a variable with specified constant value.
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OP_WITHOUT_GRADIENT(fill_constant_batch_size_like,
+REGISTER_OPERATOR(fill_constant_batch_size_like,
                  ops::FillConstantBatchSizeLikeOp,
+                  paddle::framework::EmptyGradOpMaker,
                  ops::FillConstantBatchSizeLikeOpMaker);
 REGISTER_OP_CPU_KERNEL(
    fill_constant_batch_size_like,

--- a/python/paddle/v2/framework/layers.py
+++ b/python/paddle/v2/framework/layers.py
@@ -581,25 +581,45 @@ class StaticRNN(object):
        if self.status != StaticRNN.IN_RNN_BLOCK:
            raise ValueError("You must invoke {0} in rnn block".format(method))

-    def memory(self, init=None, shape=None, dtype=None, init_value=0):
+    def memory(self,
+               init=None,
+               shape=None,
+               batch_ref=None,
+               init_value=0.0,
+               init_batch_dim_idx=0,
+               ref_batch_dim_idx=1):
+        '''
+        :param init: boot memory, if not set, a shape, batch_ref must be provided
+        :param shape: shape of the boot memory
+        :param batch_ref: batch size reference variable
+        :param init_value: the init value of boot memory
+        :param init_batch_dim_idx: the index of batch size in init's dimension
+        :param ref_batch_dim_idx: the index of batch size in batch_ref's dimension
+        :return: boot memory
+        '''
        self._assert_in_rnn_block_('memory')
        if init is None:
-            if shape is None or dtype is None:
+            if shape is None or batch_ref is None:
                raise ValueError(
-                    "if init is None, memory at least need shape and dtype")
+                    "if init is None, memory at least need shape and batch_ref")
            parent_block = self.parent_block()
            var_name = unique_name("@".join([self.helper.name, "memory_boot"]))
            boot_var = parent_block.create_var(
-                name=var_name, shape=shape, dtype=dtype, persistable=False)
+                name=var_name,
+                shape=shape,
+                dtype=batch_ref.data_type,
+                persistable=False)

            parent_block.append_op(
-                type="fill_constant",
-                inputs={},
+                type="fill_constant_batch_size_like",
+                inputs={'Input': [batch_ref]},
                outputs={'Out': [boot_var]},
                attrs={
                    'value': init_value,
-                    'shape': [40] + list(boot_var.shape[1:]),
-                    'data_type': boot_var.data_type
+                    'shape': boot_var.shape,
+                    'data_type': boot_var.data_type,
+                    'input_dim_idx': ref_batch_dim_idx,
+                    'output_dim_idx': init_batch_dim_idx
                })

            return self.memory(init=boot_var)

--- a/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py
+++ b/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py
@@ -21,9 +21,14 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest):
    def setUp(self):
        self.op_type = "fill_constant_batch_size_like"
        self.inputs = {'Input': np.random.random((219, 232)).astype("float32")}
-        self.attrs = {'value': 3.5, 'shape': [132, -1, 7], 'dim_idx': 1}
-
-        out = np.random.random((132, 232, 7)).astype("float32")
+        self.attrs = {
+            'value': 3.5,
+            'shape': [132, -1, 7],
+            'input_dim_idx': 0,
+            'output_dim_idx': 1
+        }
+
+        out = np.random.random((132, 219, 7)).astype("float32")
        out.fill(3.5)
        self.outputs = {'Out': out}


--- a/python/paddle/v2/framework/tests/test_recurrent_op.py
+++ b/python/paddle/v2/framework/tests/test_recurrent_op.py
 import unittest

-import logging
-
-from op_test import get_numeric_gradient
-from paddle.v2.framework.layers import *
+import paddle.v2.framework.layers as layers
 from paddle.v2.framework.framework import Program
 from paddle.v2.framework.executor import Executor
 from paddle.v2.framework.backward import append_backward_ops
@@ -16,8 +13,8 @@ class PyRNNBase(object):
        self.x = np.ones(shape=input_shape).astype("float32")
        self.y = np.zeros(shape=output_shape).astype("float32")

-    def step(self):
-        pass
+    def step(self, step_id, x):
+        raise NotImplementedError

    def forward(self):
        for step_id in range(self.x.shape[0]):
@@ -116,30 +113,30 @@ class RecurrentOpTest1(unittest.TestCase):
        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
        self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)

-        self.output = mean(x=self.create_rnn_op(), **self.p_info)
+        self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)

    def create_rnn_op(self):
-        x = data(
+        x = layers.data(
            shape=[self.sent_len, self.batch_size, self.input_dim],
            data_type='float32',
            name='x',
            append_batch_size=False,
            **self.p_info)
        x.stop_gradient = False
-        h_boot = data(
+        h_boot = layers.data(
            shape=[self.input_dim],
            data_type='float32',
            name='h_boot',
            **self.p_info)
        h_boot.stop_gradient = False

-        rnn = StaticRNN(main_program=self.main_program)
+        rnn = layers.StaticRNN(main_program=self.main_program)
        with rnn.step():
            h_pre = rnn.memory(init=h_boot)
            x_t = rnn.step_input(x)

-            h = scale(
-                x=elementwise_add(
+            h = layers.scale(
+                x=layers.elementwise_add(
                    x=h_pre, y=x_t, **self.p_info),
                scale=self.py_rnn.scale,
                **self.p_info)
@@ -249,41 +246,41 @@ class RecurrentOpTest2(RecurrentOpTest1):
        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
        self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape)

-        self.output = mean(x=self.create_rnn_op(), **self.p_info)
+        self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)

    def create_rnn_op(self):
-        x = data(
+        x = layers.data(
            shape=[self.sent_len, self.batch_size, self.input_dim],
            data_type='float32',
            name='x',
            append_batch_size=False,
            **self.p_info)
        x.stop_gradient = False
-        h_boot = data(
+        h_boot = layers.data(
            shape=[self.input_dim],
            data_type='float32',
            name='h_boot',
            **self.p_info)
        h_boot.stop_gradient = False

-        rnn = StaticRNN(main_program=self.main_program)
+        rnn = layers.StaticRNN(main_program=self.main_program)
        with rnn.step():
            h_pre = rnn.memory(init=h_boot)
            x_t = rnn.step_input(x)

-            temp_l = fc(input=x_t,
+            temp_l = layers.fc(input=x_t,
                               size=self.input_dim,
                               param_attr={'name': 'W'},
                               bias_attr=False,
                               **self.p_info)
-            temp_r = fc(input=h_pre,
+            temp_r = layers.fc(input=h_pre,
                               size=self.input_dim,
                               param_attr={'name': 'U'},
                               bias_attr=False,
                               **self.p_info)

-            h = sigmoid(
-                x=elementwise_add(
+            h = layers.sigmoid(
+                x=layers.elementwise_add(
                    x=temp_l, y=temp_r, **self.p_info),
                **self.p_info)

@@ -293,7 +290,7 @@ class RecurrentOpTest2(RecurrentOpTest1):
        return rnn()


-class RecurrentOpTest3(RecurrentOpTest1):
+class RecurrentOpMultipleMemoryTest(RecurrentOpTest1):
    '''
    Test RNNOp with two memories
    equation:
@@ -310,8 +307,8 @@ class RecurrentOpTest3(RecurrentOpTest1):

    class PySimpleRNN3(PyRNNBase):
        def __init__(self, input_shape, output_shape):
-            super(RecurrentOpTest3.PySimpleRNN3, self).__init__(input_shape,
-                                                                output_shape)
+            super(RecurrentOpMultipleMemoryTest.PySimpleRNN3, self).__init__(
+                input_shape, output_shape)

            seq_len, batch_size, input_dim = input_shape
            self.h_boot1 = np.random.normal(size=(batch_size,
@@ -345,27 +342,27 @@ class RecurrentOpTest3(RecurrentOpTest1):

        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
-        self.py_rnn = RecurrentOpTest3.PySimpleRNN3(self.input_shape,
-                                                    self.output_shape)
+        self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3(
+            self.input_shape, self.output_shape)

-        self.output = mean(x=self.create_rnn_op(), **self.p_info)
+        self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)

    def create_rnn_op(self):
-        x = data(
+        x = layers.data(
            shape=[self.sent_len, self.batch_size, self.input_dim],
            data_type='float32',
            name='x',
            append_batch_size=False,
            **self.p_info)
        x.stop_gradient = False
-        h_boot1 = data(
+        h_boot1 = layers.data(
            shape=[self.batch_size, self.input_dim],
            data_type='float32',
            name='h_boot1',
            append_batch_size=False,
            **self.p_info)
        h_boot1.stop_gradient = False
-        h_boot2 = data(
+        h_boot2 = layers.data(
            shape=[self.batch_size, self.input_dim],
            data_type='float32',
            name='h_boot2',
@@ -373,15 +370,15 @@ class RecurrentOpTest3(RecurrentOpTest1):
            **self.p_info)
        h_boot2.stop_gradient = False

-        rnn = StaticRNN(main_program=self.main_program)
+        rnn = layers.StaticRNN(main_program=self.main_program)
        with rnn.step():
            h_pre1 = rnn.memory(init=h_boot1)
            h_pre2 = rnn.memory(init=h_boot2)
            x_t = rnn.step_input(x)

-            mem1 = scale(x=h_pre1, scale=1.0, **self.p_info)
-            mem2 = scale(x=h_pre2, scale=1.0, **self.p_info)
-            out = sums(input=[mem1, x_t, mem2], **self.p_info)
+            mem1 = layers.scale(x=h_pre1, scale=1.0, **self.p_info)
+            mem2 = layers.scale(x=h_pre2, scale=1.0, **self.p_info)
+            out = layers.sums(input=[mem1, x_t, mem2], **self.p_info)

            rnn.update_memory(h_pre1, mem1)
            rnn.update_memory(h_pre2, mem2)
@@ -390,5 +387,70 @@ class RecurrentOpTest3(RecurrentOpTest1):
        return rnn()


+class RecurrentOpNoMemBootTest(RecurrentOpTest1):
+    '''
+    Test RNNOp with two memories
+    equation:
+        mem = x + mem_pre
+        y = mem
+    vars:
+        - x
+    memories:
+        - mem
+    outputs:
+       - y
+    '''
+
+    class PySimpleRNN4(PyRNNBase):
+        def __init__(self, input_shape, output_shape):
+            super(RecurrentOpNoMemBootTest.PySimpleRNN4, self).__init__(
+                input_shape, output_shape)
+            men_dim = input_shape
+            self.mems = np.zeros(shape=men_dim).astype("float32")
+
+        def step(self, step_id, x):
+            if step_id == 0:
+                pre_mem = np.zeros_like(x)
+            else:
+                pre_mem = self.mems[step_id - 1]
+            self.mems[step_id] = pre_mem + x
+            self.y[step_id] = self.mems[step_id]
+
+    input_dim = 1
+    batch_size = 1
+    sent_len = 2
+
+    def setUp(self):
+        self.setup_program()
+
+        self.data_field = {"x"}
+
+        self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
+        self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
+        self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape,
+                                                            self.output_shape)
+        self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)
+        print self.main_program
+
+    def create_rnn_op(self):
+        x = layers.data(
+            shape=[self.sent_len, self.batch_size, self.input_dim],
+            data_type='float32',
+            name='x',
+            append_batch_size=False,
+            **self.p_info)
+        x.stop_gradient = False
+
+        rnn = layers.StaticRNN(main_program=self.main_program)
+        with rnn.step():
+            mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x)
+            x_t = rnn.step_input(x)
+            mem = layers.elementwise_add(x=mem_pre, y=x_t, **self.p_info)
+            rnn.update_memory(mem_pre, mem)
+            rnn.output(mem)
+
+        return rnn()
+
+
 if __name__ == '__main__':
    unittest.main()