未验证 提交 20667e1e 编写于 作者: Y Yang Yang(Tony) 提交者: GitHub

add fill_constant_batch_size_like_op to Static RNN's h_boot (#5332)

上级 70154597
......@@ -34,15 +34,18 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel {
std::vector<int64_t> shape_int64(shape.size(), 0);
std::transform(shape.begin(), shape.end(), shape_int64.begin(),
[](int a) { return static_cast<int64_t>(a); });
auto dims = framework::make_ddim(shape_int64);
auto output_dim = framework::make_ddim(shape_int64);
int dim_idx = ctx->Attrs().Get<int>("dim_idx");
PADDLE_ENFORCE_GE(dim_idx, 0);
PADDLE_ENFORCE_GT(static_cast<int>(shape.size()), dim_idx);
PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), dim_idx);
int input_dim_idx = ctx->Attrs().Get<int>("input_dim_idx");
PADDLE_ENFORCE_GE(input_dim_idx, 0);
PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), input_dim_idx);
dims[dim_idx] = ctx->GetInputDim("Input")[dim_idx];
ctx->SetOutputDim("Out", dims);
int output_dim_idx = ctx->Attrs().Get<int>("output_dim_idx");
PADDLE_ENFORCE_GE(output_dim_idx, 0);
PADDLE_ENFORCE_GT(static_cast<int>(shape.size()), output_dim_idx);
output_dim[output_dim_idx] = ctx->GetInputDim("Input")[input_dim_idx];
ctx->SetOutputDim("Out", output_dim);
}
protected:
......@@ -69,8 +72,11 @@ class FillConstantBatchSizeLikeOpMaker
"(Tensor) Tensor of specified shape will be filled "
"with the specified value");
AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output");
AddAttr<int>("dim_idx",
"(int, default 0) The index of batch size dimension")
AddAttr<int>("input_dim_idx",
"(int, default 0) the index of input's batch size dimension")
.SetDefault(0);
AddAttr<int>("output_dim_idx",
"(int, default 0) the index of output's batch size dimension")
.SetDefault(0);
AddAttr<float>("value", "(float, default 0) The value to be filled")
.SetDefault(0.0f);
......@@ -86,9 +92,10 @@ Fill up a variable with specified constant value.
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(fill_constant_batch_size_like,
ops::FillConstantBatchSizeLikeOp,
ops::FillConstantBatchSizeLikeOpMaker);
REGISTER_OPERATOR(fill_constant_batch_size_like,
ops::FillConstantBatchSizeLikeOp,
paddle::framework::EmptyGradOpMaker,
ops::FillConstantBatchSizeLikeOpMaker);
REGISTER_OP_CPU_KERNEL(
fill_constant_batch_size_like,
ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUPlace, float>,
......
......@@ -581,25 +581,45 @@ class StaticRNN(object):
if self.status != StaticRNN.IN_RNN_BLOCK:
raise ValueError("You must invoke {0} in rnn block".format(method))
def memory(self, init=None, shape=None, dtype=None, init_value=0):
def memory(self,
init=None,
shape=None,
batch_ref=None,
init_value=0.0,
init_batch_dim_idx=0,
ref_batch_dim_idx=1):
'''
:param init: boot memory, if not set, a shape, batch_ref must be provided
:param shape: shape of the boot memory
:param batch_ref: batch size reference variable
:param init_value: the init value of boot memory
:param init_batch_dim_idx: the index of batch size in init's dimension
:param ref_batch_dim_idx: the index of batch size in batch_ref's dimension
:return: boot memory
'''
self._assert_in_rnn_block_('memory')
if init is None:
if shape is None or dtype is None:
if shape is None or batch_ref is None:
raise ValueError(
"if init is None, memory at least need shape and dtype")
"if init is None, memory at least need shape and batch_ref")
parent_block = self.parent_block()
var_name = unique_name("@".join([self.helper.name, "memory_boot"]))
boot_var = parent_block.create_var(
name=var_name, shape=shape, dtype=dtype, persistable=False)
name=var_name,
shape=shape,
dtype=batch_ref.data_type,
persistable=False)
parent_block.append_op(
type="fill_constant",
inputs={},
type="fill_constant_batch_size_like",
inputs={'Input': [batch_ref]},
outputs={'Out': [boot_var]},
attrs={
'value': init_value,
'shape': [40] + list(boot_var.shape[1:]),
'data_type': boot_var.data_type
'shape': boot_var.shape,
'data_type': boot_var.data_type,
'input_dim_idx': ref_batch_dim_idx,
'output_dim_idx': init_batch_dim_idx
})
return self.memory(init=boot_var)
......
......@@ -21,9 +21,14 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest):
def setUp(self):
self.op_type = "fill_constant_batch_size_like"
self.inputs = {'Input': np.random.random((219, 232)).astype("float32")}
self.attrs = {'value': 3.5, 'shape': [132, -1, 7], 'dim_idx': 1}
out = np.random.random((132, 232, 7)).astype("float32")
self.attrs = {
'value': 3.5,
'shape': [132, -1, 7],
'input_dim_idx': 0,
'output_dim_idx': 1
}
out = np.random.random((132, 219, 7)).astype("float32")
out.fill(3.5)
self.outputs = {'Out': out}
......
import unittest
import logging
from op_test import get_numeric_gradient
from paddle.v2.framework.layers import *
import paddle.v2.framework.layers as layers
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
......@@ -16,8 +13,8 @@ class PyRNNBase(object):
self.x = np.ones(shape=input_shape).astype("float32")
self.y = np.zeros(shape=output_shape).astype("float32")
def step(self):
pass
def step(self, step_id, x):
raise NotImplementedError
def forward(self):
for step_id in range(self.x.shape[0]):
......@@ -116,30 +113,30 @@ class RecurrentOpTest1(unittest.TestCase):
self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)
self.output = mean(x=self.create_rnn_op(), **self.p_info)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)
def create_rnn_op(self):
x = data(
x = layers.data(
shape=[self.sent_len, self.batch_size, self.input_dim],
data_type='float32',
name='x',
append_batch_size=False,
**self.p_info)
x.stop_gradient = False
h_boot = data(
h_boot = layers.data(
shape=[self.input_dim],
data_type='float32',
name='h_boot',
**self.p_info)
h_boot.stop_gradient = False
rnn = StaticRNN(main_program=self.main_program)
rnn = layers.StaticRNN(main_program=self.main_program)
with rnn.step():
h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x)
h = scale(
x=elementwise_add(
h = layers.scale(
x=layers.elementwise_add(
x=h_pre, y=x_t, **self.p_info),
scale=self.py_rnn.scale,
**self.p_info)
......@@ -249,41 +246,41 @@ class RecurrentOpTest2(RecurrentOpTest1):
self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape)
self.output = mean(x=self.create_rnn_op(), **self.p_info)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)
def create_rnn_op(self):
x = data(
x = layers.data(
shape=[self.sent_len, self.batch_size, self.input_dim],
data_type='float32',
name='x',
append_batch_size=False,
**self.p_info)
x.stop_gradient = False
h_boot = data(
h_boot = layers.data(
shape=[self.input_dim],
data_type='float32',
name='h_boot',
**self.p_info)
h_boot.stop_gradient = False
rnn = StaticRNN(main_program=self.main_program)
rnn = layers.StaticRNN(main_program=self.main_program)
with rnn.step():
h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x)
temp_l = fc(input=x_t,
size=self.input_dim,
param_attr={'name': 'W'},
bias_attr=False,
**self.p_info)
temp_r = fc(input=h_pre,
size=self.input_dim,
param_attr={'name': 'U'},
bias_attr=False,
**self.p_info)
h = sigmoid(
x=elementwise_add(
temp_l = layers.fc(input=x_t,
size=self.input_dim,
param_attr={'name': 'W'},
bias_attr=False,
**self.p_info)
temp_r = layers.fc(input=h_pre,
size=self.input_dim,
param_attr={'name': 'U'},
bias_attr=False,
**self.p_info)
h = layers.sigmoid(
x=layers.elementwise_add(
x=temp_l, y=temp_r, **self.p_info),
**self.p_info)
......@@ -293,7 +290,7 @@ class RecurrentOpTest2(RecurrentOpTest1):
return rnn()
class RecurrentOpTest3(RecurrentOpTest1):
class RecurrentOpMultipleMemoryTest(RecurrentOpTest1):
'''
Test RNNOp with two memories
equation:
......@@ -310,8 +307,8 @@ class RecurrentOpTest3(RecurrentOpTest1):
class PySimpleRNN3(PyRNNBase):
def __init__(self, input_shape, output_shape):
super(RecurrentOpTest3.PySimpleRNN3, self).__init__(input_shape,
output_shape)
super(RecurrentOpMultipleMemoryTest.PySimpleRNN3, self).__init__(
input_shape, output_shape)
seq_len, batch_size, input_dim = input_shape
self.h_boot1 = np.random.normal(size=(batch_size,
......@@ -345,27 +342,27 @@ class RecurrentOpTest3(RecurrentOpTest1):
self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = RecurrentOpTest3.PySimpleRNN3(self.input_shape,
self.output_shape)
self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3(
self.input_shape, self.output_shape)
self.output = mean(x=self.create_rnn_op(), **self.p_info)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)
def create_rnn_op(self):
x = data(
x = layers.data(
shape=[self.sent_len, self.batch_size, self.input_dim],
data_type='float32',
name='x',
append_batch_size=False,
**self.p_info)
x.stop_gradient = False
h_boot1 = data(
h_boot1 = layers.data(
shape=[self.batch_size, self.input_dim],
data_type='float32',
name='h_boot1',
append_batch_size=False,
**self.p_info)
h_boot1.stop_gradient = False
h_boot2 = data(
h_boot2 = layers.data(
shape=[self.batch_size, self.input_dim],
data_type='float32',
name='h_boot2',
......@@ -373,15 +370,15 @@ class RecurrentOpTest3(RecurrentOpTest1):
**self.p_info)
h_boot2.stop_gradient = False
rnn = StaticRNN(main_program=self.main_program)
rnn = layers.StaticRNN(main_program=self.main_program)
with rnn.step():
h_pre1 = rnn.memory(init=h_boot1)
h_pre2 = rnn.memory(init=h_boot2)
x_t = rnn.step_input(x)
mem1 = scale(x=h_pre1, scale=1.0, **self.p_info)
mem2 = scale(x=h_pre2, scale=1.0, **self.p_info)
out = sums(input=[mem1, x_t, mem2], **self.p_info)
mem1 = layers.scale(x=h_pre1, scale=1.0, **self.p_info)
mem2 = layers.scale(x=h_pre2, scale=1.0, **self.p_info)
out = layers.sums(input=[mem1, x_t, mem2], **self.p_info)
rnn.update_memory(h_pre1, mem1)
rnn.update_memory(h_pre2, mem2)
......@@ -390,5 +387,70 @@ class RecurrentOpTest3(RecurrentOpTest1):
return rnn()
class RecurrentOpNoMemBootTest(RecurrentOpTest1):
'''
Test RNNOp with two memories
equation:
mem = x + mem_pre
y = mem
vars:
- x
memories:
- mem
outputs:
- y
'''
class PySimpleRNN4(PyRNNBase):
def __init__(self, input_shape, output_shape):
super(RecurrentOpNoMemBootTest.PySimpleRNN4, self).__init__(
input_shape, output_shape)
men_dim = input_shape
self.mems = np.zeros(shape=men_dim).astype("float32")
def step(self, step_id, x):
if step_id == 0:
pre_mem = np.zeros_like(x)
else:
pre_mem = self.mems[step_id - 1]
self.mems[step_id] = pre_mem + x
self.y[step_id] = self.mems[step_id]
input_dim = 1
batch_size = 1
sent_len = 2
def setUp(self):
self.setup_program()
self.data_field = {"x"}
self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape,
self.output_shape)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info)
print self.main_program
def create_rnn_op(self):
x = layers.data(
shape=[self.sent_len, self.batch_size, self.input_dim],
data_type='float32',
name='x',
append_batch_size=False,
**self.p_info)
x.stop_gradient = False
rnn = layers.StaticRNN(main_program=self.main_program)
with rnn.step():
mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x)
x_t = rnn.step_input(x)
mem = layers.elementwise_add(x=mem_pre, y=x_t, **self.p_info)
rnn.update_memory(mem_pre, mem)
rnn.output(mem)
return rnn()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册