diff --git a/paddle/operators/rnn_memory_helper_op.cc b/paddle/operators/rnn_memory_helper_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f383faf5dd37304ac9cf2dde778fd149692a75e2 --- /dev/null +++ b/paddle/operators/rnn_memory_helper_op.cc @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { +class RNNMemoryHelperOp : public framework::OperatorBase { + public: + RNNMemoryHelperOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto mem_var_name = Input("X"); + auto *mem_var = scope.FindVar(mem_var_name); + PADDLE_ENFORCE(mem_var != nullptr, + "Cannot find mem_var in scope, mem_var_name is %s", + mem_var_name); + + auto out_name = this->Output("Out"); + auto *out_var = scope.FindVar(out_name); + PADDLE_ENFORCE(out_var != nullptr, + "Cannot find out_var in scope, out_var_name is %s", + out_name); + + auto *out_tensor = out_var->GetMutable(); + auto &mem_tensor = mem_var->Get(); + out_tensor->ShareDataWith(mem_tensor); + out_tensor->set_lod(mem_tensor.lod()); + } +}; + +class RNNMemoryHelperOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), ""); + PADDLE_ENFORCE(ctx->HasOutput("Out"), ""); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ "Out"); + } +}; + +class RNNMemoryHelperOpInfoMaker : public framework::OpProtoAndCheckerMaker { + public: + RNNMemoryHelperOpInfoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", ""); + AddOutput("Out", ""); + AddAttr("data_type", + "(int, default 5 (FP32)) " + "Output data type") + .SetDefault(framework::DataType::FP32); + AddComment(""); + } +}; + +class RNNMemoryHelperGradOp : public framework::OperatorBase { + public: + RNNMemoryHelperGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto out_grad_var_name = Input(framework::GradVarName("Out")); + auto *out_grad_var = scope.FindVar(out_grad_var_name); + + auto in_grad_var_name = Output(framework::GradVarName("X")); + auto *in_grad_var = scope.FindVar(in_grad_var_name); + PADDLE_ENFORCE(in_grad_var != nullptr, + "Cannot find in_grad_var in scope, name is %s", + in_grad_var_name); + + if (out_grad_var == nullptr) { + VLOG(5) << "Using fill constant 0 as starting gradient"; + auto in_var_name = Input("X"); + auto *in_var = scope.FindVar(in_var_name); + auto &in_var_tensor = in_var->Get(); + + framework::AttributeMap attrs; + attrs["data_type"] = framework::ToDataType(in_var_tensor.type()); + attrs["shape"] = framework::vectorize2int(in_var_tensor.dims()); + attrs["value"] = 0.0f; + + auto zero_op = framework::OpRegistry::CreateOp( + "fill_constant", {}, {{"Out", {in_grad_var_name}}}, attrs); + zero_op->Run(scope, dev_ctx); + } else { + auto &out_grad_tensor = out_grad_var->Get(); + auto *in_grad_tensor = in_grad_var->GetMutable(); + in_grad_tensor->ShareDataWith(out_grad_tensor); + in_grad_tensor->set_lod(out_grad_tensor.lod()); + } + } +}; + +class RNNMemoryHelperGradOpInfoMaker + : public framework::OpProtoAndCheckerMaker { + public: + RNNMemoryHelperGradOpInfoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput(framework::GradVarName("Out"), ""); + AddInput("X", ""); + AddInput("Out", ""); + AddOutput(framework::GradVarName("X"), ""); + AddAttr("data_type", + "(int, default 5 (FP32)) " + "Output data type") + .SetDefault(framework::DataType::FP32); + AddComment(""); + } +}; + +class RNNMemoryHelperGradOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + auto x_grad_name = framework::GradVarName("X"); + auto out_grad_name = framework::GradVarName("Out"); + PADDLE_ENFORCE(ctx->HasInput(out_grad_name), ""); + PADDLE_ENFORCE(ctx->HasOutput(x_grad_name), ""); + ctx->SetOutputDim(x_grad_name, ctx->GetInputDim(out_grad_name)); + ctx->ShareLoD(out_grad_name, /*->*/ x_grad_name); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(rnn_memory_helper, paddle::operators::RNNMemoryHelperOp, + paddle::operators::RNNMemoryHelperOpInfoMaker, + paddle::operators::RNNMemoryHelperOpShapeInference, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(rnn_memory_helper_grad, + paddle::operators::RNNMemoryHelperGradOp, + paddle::operators::RNNMemoryHelperGradOpInfoMaker, + paddle::operators::RNNMemoryHelperGradOpShapeInference); diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index b3493fc378f4d866d16a6a0a739e0ccc8e44d97c..7da6f81359b07e0b73736124d6eec7a0f3fd3036 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -264,7 +264,9 @@ class Operator(object): self.desc.set_attr(attr_name, attrs[attr_name]) self.desc.check_attrs() - no_kernel_op_set = {'feed', 'fetch', 'save', 'load'} + no_kernel_op_set = { + 'feed', 'fetch', 'save', 'load', 'rnn_memory_helper_grad' + } if type not in no_kernel_op_set: self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) diff --git a/python/paddle/v2/framework/tests/test_rnn_memory_helper_op.py b/python/paddle/v2/framework/tests/test_rnn_memory_helper_op.py new file mode 100644 index 0000000000000000000000000000000000000000..731beff17cc96d26c2d9390a956c774b8676b179 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_rnn_memory_helper_op.py @@ -0,0 +1,130 @@ +import unittest + +from paddle.v2.framework.framework import Program +from paddle.v2.framework.executor import Executor +from paddle.v2.framework.backward import append_backward_ops +import numpy as np +import paddle.v2.framework.core as core + + +def create_tensor(np_data, place): + tensor = core.LoDTensor() + tensor.set(np_data, place) + return tensor + + +class RNNMemoryHelperOpTest(unittest.TestCase): + def setUp(self): + self.program = Program() + self.place = core.CPUPlace() + + self.X = self.program.global_block().create_var( + name='X', shape=[2, 3], dtype='float32') + self.Out = self.program.global_block().create_var( + name='Out', shape=[2, 3], dtype='float32') + self.program.global_block().append_op( + type='rnn_memory_helper', + inputs={"X": self.X}, + outputs={"Out": self.Out}, + attrs={}) + + def test_forward(self): + x_np = np.random.normal(size=(2, 3)).astype("float32") + self.feed_map = {'X': create_tensor(x_np, self.place)} + self.fetch_list = [self.Out] + exe = Executor(self.place) + out = exe.run(self.program, + feed=self.feed_map, + fetch_list=self.fetch_list) + np.isclose(np.array(out[0]), x_np, rtol=1e-5) + + +class RNNMemoryHelperGradOpTest(unittest.TestCase): + def setUp(self): + self.program = Program() + self.place = core.CPUPlace() + + self.input_names = ['X', 'Out', 'Out@GRAD'] + self.input_vars = { + name: self.program.global_block().create_var( + name=name, shape=[2, 3], dtype='float32') + for name in self.input_names + } + + self.output_names = ['X@GRAD'] + self.output_vars = { + name: self.program.global_block().create_var( + name=name, shape=[2, 3], dtype='float32') + for name in self.output_names + } + + self.program.global_block().append_op( + type='rnn_memory_helper_grad', + inputs=self.input_vars, + outputs=self.output_vars, + attrs={}) + + def test_backward(self): + self.feed_map = { + name: create_tensor( + np.random.normal(size=(2, 3)).astype("float32"), self.place) + for name in self.input_names + } + self.fetch_list = [self.output_vars['X@GRAD']] + + exe = Executor(self.place) + out = exe.run(self.program, + feed=self.feed_map, + fetch_list=self.fetch_list) + np.isclose(np.array(out[0]), self.feed_map['Out@GRAD'], rtol=1e-5) + + +class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): + def setUp(self): + self.program = Program() + self.fake_program = Program() + self.place = core.CPUPlace() + + self.input_names = ['X', 'Out'] + self.input_vars = { + name: self.program.global_block().create_var( + name=name, shape=[2, 3], dtype='float32') + for name in self.input_names + } + self.input_vars["Out@GRAD"] = \ + self.fake_program.global_block().create_var( + name="Out@GRAD", shape=[2, 3], dtype='float32') + + self.output_names = ['X@GRAD'] + self.output_vars = { + name: self.program.global_block().create_var( + name=name, shape=[2, 3], dtype='float32') + for name in self.output_names + } + + self.program.global_block().append_op( + type='rnn_memory_helper_grad', + inputs=self.input_vars, + outputs=self.output_vars, + attrs={}) + + def test_backward(self): + self.feed_map = { + name: create_tensor( + np.random.normal(size=(2, 3)).astype("float32"), self.place) + for name in ['X', 'Out'] + } + self.fetch_list = [self.output_vars['X@GRAD']] + + exe = Executor(self.place) + out = exe.run(self.program, + feed=self.feed_map, + fetch_list=self.fetch_list) + np.isclose( + np.array(out[0]), + np.zeros(shape=(2, 3)).astype("float32"), + rtol=1e-5) + + +if __name__ == '__main__': + unittest.main()