From b26f5050020ea14fca6c1c2c759aa269c6331177 Mon Sep 17 00:00:00 2001 From: "Yang Yang(Tony)" Date: Wed, 11 Apr 2018 14:04:30 -0700 Subject: [PATCH] remove net op and cond_op (#9663) * remove net op and cond_op * fix cpplint * fix dependency * delete backward_test; fix compile * disable batch_norm backward * rm test_net.py * make batchnorm test independent of backward.cc * make test_layer_norm_op independent of backward.cc * make test_layer_norm_op independent of backward.cc * delete unused code * clean up --- paddle/fluid/framework/CMakeLists.txt | 3 +- paddle/fluid/framework/backward.cc | 119 +-- paddle/fluid/framework/backward_test.cc | 918 ------------------ paddle/fluid/framework/prune_test.cc | 7 +- paddle/fluid/operators/CMakeLists.txt | 4 +- paddle/fluid/operators/cond_op.cc | 235 ----- paddle/fluid/operators/cond_op.h | 96 -- paddle/fluid/operators/minus_op.cc | 2 +- paddle/fluid/operators/net_op.cc | 103 -- paddle/fluid/operators/net_op.h | 130 --- paddle/fluid/operators/net_op_test.cc | 103 -- paddle/fluid/operators/prelu_op.cc | 3 +- paddle/fluid/operators/scale_op.cc | 3 +- paddle/fluid/operators/split_op.cc | 1 - paddle/fluid/pybind/pybind.cc | 49 +- paddle/fluid/recordio/chunk.cc | 4 +- paddle/fluid/recordio/header.cc | 3 + .../tests/unittests/test_batch_norm_op.py | 361 +++---- .../fluid/tests/unittests/test_cond_op.py | 128 --- .../tests/unittests/test_layer_norm_op.py | 231 ++--- .../paddle/fluid/tests/unittests/test_net.py | 53 - 21 files changed, 205 insertions(+), 2351 deletions(-) delete mode 100644 paddle/fluid/framework/backward_test.cc delete mode 100644 paddle/fluid/operators/cond_op.cc delete mode 100644 paddle/fluid/operators/cond_op.h delete mode 100644 paddle/fluid/operators/net_op.cc delete mode 100644 paddle/fluid/operators/net_op.h delete mode 100644 paddle/fluid/operators/net_op_test.cc delete mode 100644 python/paddle/fluid/tests/unittests/test_cond_op.py delete mode 100644 python/paddle/fluid/tests/unittests/test_net.py diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 3840bbe83b..77f459b4dd 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -79,8 +79,7 @@ add_custom_command(TARGET framework_py_proto POST_BUILD COMMENT "Copy generated python proto into directory paddle/fluid/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -cc_library(backward SRCS backward.cc DEPS net_op) -cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op) +cc_library(backward SRCS backward.cc DEPS operator) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) diff --git a/paddle/fluid/framework/backward.cc b/paddle/fluid/framework/backward.cc index 1314af2b3d..76133b5136 100644 --- a/paddle/fluid/framework/backward.cc +++ b/paddle/fluid/framework/backward.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/backward.h" -#include "paddle/fluid/operators/net_op.h" #include #include @@ -22,7 +21,6 @@ limitations under the License. */ #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/net_op.h" namespace paddle { namespace framework { @@ -60,12 +58,7 @@ static inline std::unique_ptr CreateGradOp( if (grad_ops.size() == 1) { return std::move(grad_ops[0]); } else { - auto net_op = new operators::NetOp(); - for (auto& grad_op : grad_ops) { - net_op->AppendOp(std::move(grad_op)); - } - net_op->CompleteAddOp(); - return std::unique_ptr(net_op); + PADDLE_THROW("Unexpected Branch"); } } @@ -91,10 +84,7 @@ static bool AllInSet( } static std::unique_ptr NOP() { - auto net_op = new operators::NetOp(); - net_op->SetType("@NOP@"); - net_op->CompleteAddOp(); - return std::unique_ptr(net_op); + PADDLE_THROW("Unexpected Branch"); } // Get backward operator from a forward operator, a recursive implementation. @@ -136,110 +126,7 @@ static std::unique_ptr BackwardRecursive( } // Returned gradient network - auto net = std::unique_ptr(new operators::NetOp()); - - if (forwardOp.IsNetOp()) { - // Because forwardOp is a net op, it can static_cast. - auto& forwardNet = static_cast(forwardOp); - - // Map from output gradient variable name to operator's indices in - // backward net's ops_. That operator generates that variable. - std::unordered_map> dup_output_ops; - - size_t local_op_id = 0; - // reversely travel forwardNet and collect all duplicate outputs. - for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); - ++it, ++local_op_id) { - auto& fwd = *it; - auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id); - ForEachVarName(bwd->Outputs(), - [&dup_output_ops, local_op_id](const std::string& out) { - dup_output_ops[out].emplace_back(local_op_id); - return false; - }); - net->AppendOp(std::move(bwd)); - } - // Get unique ID for this method. - auto uid = uniq_id++; - // TODO(dzh): more comment - // multiple operators which have the same output (y for example) may - // overwrite the same y variable when backward, special operations are token - // to handle this case. For each duplicate output, rename it to an alias - // (original name with a offset), append an `add` op for its operator, - // and finally sum all the alias variable to the final output variable y. - using Pos = std::pair>; - std::list insert_position; - for (auto& dup_output_op : dup_output_ops) { - const std::string& name = dup_output_op.first; - // duplicate @Empty@ don't need to be added - if (name == kEmptyVarName) continue; - - auto& dup_op = dup_output_op.second; - // no duplicate output - if (dup_op.size() == 1) continue; - - // process the duplicate outputs - std::vector dup_outputs; - for (size_t i = 0; i < dup_op.size(); ++i) { - // rename each duplicate output to an alias - auto op_offset = dup_op[i]; - dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" + - std::to_string(i)); - net->ops_[op_offset]->Rename(name, dup_outputs.back()); - } - // collect all the offset for each alias, - // insert a sum operator to add all aliases to output - insert_position.push_back( - {dup_op.back(), - OpRegistry::CreateOp("sum", {{"X", dup_outputs}}, {{"Out", {name}}}, - AttributeMap{})}); - } - - // make sure the inserted `sum` ops follow the BFS order. - insert_position.sort( - [](const Pos& l, const Pos& r) { return l.first > r.first; }); - - for (auto& pos : insert_position) { - net->InsertOp(pos.first + 1, std::move(pos.second)); - } - } else { - std::unique_ptr grad_op( - CreateGradOp(forwardOp, no_grad_names, grad_to_var)); - - ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( - const std::string& grad_input) { - if (no_grad_names.count(grad_input)) { - // +1 for \0 - std::string prefix = grad_input.substr( - 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); - grad_op->Rename(grad_input, prefix + kZeroVarSuffix); - - // If part of input gradient of that operator is not calculated, fill - // zero variables to that input gradient. - net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}}, - {{"Out", {grad_input}}}, - AttributeMap{})); - } - return false; - }); - - ForEachVarName(grad_op->Outputs(), - [&no_grad_names, &grad_op](const std::string& grad_output) { - if (no_grad_names.count(grad_output)) { - grad_op->Rename(grad_output, kEmptyVarName); - } - return false; - }); - - if (net->ops_.empty()) { // Current no aux op is added to network - return grad_op; - } - net->AppendOp(std::move(grad_op)); - } - net->SetType("@GENERATED_BACKWARD@"); - net->CompleteAddOp(); - return std::unique_ptr( - static_cast(net.release())); + PADDLE_THROW("Unexpected Branch"); } // See header for comments diff --git a/paddle/fluid/framework/backward_test.cc b/paddle/fluid/framework/backward_test.cc deleted file mode 100644 index cc1f871360..0000000000 --- a/paddle/fluid/framework/backward_test.cc +++ /dev/null @@ -1,918 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/backward.h" - -#include -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/var_desc.h" -#include "paddle/fluid/operators/net_op.h" - -USE_NO_KERNEL_OP(fill_constant); - -namespace paddle { -namespace framework { - -using DeviceContext = platform::DeviceContext; - -class NoneOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override {} -}; - -template -class NoneKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override {} -}; - -class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { - public: - RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input X of Add"); - AddInput("b", "Bias of Add"); - AddOutput("Out", "Out of Add"); - AddComment("Add Op"); - } -}; - -class RowWiseAddGradMaker : public SingleGradOpDescMaker { - public: - using SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - auto grad_op = new OpDesc(); - grad_op->SetInput(GradVarName("Out"), OutputGrad("Out")); - grad_op->SetOutput(GradVarName("X"), InputGrad("X")); - grad_op->SetOutput(GradVarName("b"), InputGrad("b")); - grad_op->SetType("rowwise_add_grad"); - return std::unique_ptr(grad_op); - } -}; - -class MulOpMaker : public OpProtoAndCheckerMaker { - public: - MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "A"); - AddInput("Y", "B"); - AddOutput("Out", "Out"); - AddAttr("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1); - AddAttr("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1); - AddComment("Mul"); - } -}; - -class SigmoidOpMaker : public OpProtoAndCheckerMaker { - public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "X"); - AddOutput("Out", "Y"); - AddComment("Sigmoid"); - } -}; - -class NoGradOpMaker : public OpProtoAndCheckerMaker { - public: - NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "X input"); - AddOutput("Out", "Y output"); - AddComment("NoGradOp, same input output. no Grad"); - } -}; - -class FcOp : public operators::NetOp { - public: - FcOp(const std::string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, const AttributeMap &attrs) - : NetOp(type, inputs, outputs, attrs) { - AppendOp(OpRegistry::CreateOp( - "mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}}, - {{"Out", {Output("mul_result")}}}, AttributeMap{})); - auto input_b = Inputs("b"); - std::string before_act = "mul_result"; - if (input_b.size() != 0) { - AppendOp(OpRegistry::CreateOp( - "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, - {{"Out", {Output("add_result")}}}, AttributeMap{})); - before_act = "add_result"; - } else { - auto out_varname = Output("add_result"); - if (out_varname != kEmptyVarName) { - this->Rename(out_varname, kEmptyVarName); - } - } - - AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, - {{"Out", {Output("Out")}}}, AttributeMap{})); - CompleteAddOp(false); - } -}; - -class FcOpMaker : public OpProtoAndCheckerMaker { - public: - FcOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x"); - AddInput("W", "w"); - AddInput("b", "b"); - AddOutput("mul_result", "").AsIntermediate(); - AddOutput("add_result", "").AsIntermediate(); - AddOutput("Out", ""); - AddComment(""); - } -}; - -class ManyOutputOpMaker : public OpProtoAndCheckerMaker { - public: - ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("x", "x"); - AddOutput("y", "y"); - AddOutput("z", "z"); - AddComment(""); - } -}; - -class FillZeroOpMaker : public OpProtoAndCheckerMaker { - public: - FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x"); - AddOutput("Out", "out"); - AddComment(""); - } -}; - -class SumOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SumOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "the input tensors of sum operator.").AsDuplicable(); - AddOutput("Out", "the output tensor of sum operator."); - AddComment(""); - } -}; - -class MultInOutOpMaker : public OpProtoAndCheckerMaker { - public: - MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x"); - AddInput("H", "h"); - AddOutput("Y", "y"); - AddOutput("Z", "z"); - AddComment(""); - } -}; - -class MinusGradOpDescMaker : public GradOpDescMakerBase { - public: - using GradOpDescMakerBase::GradOpDescMakerBase; - - std::vector> operator()() const override { - std::vector> retv; - auto x_g = InputGrad("X"); - if (!x_g.empty()) { - auto *op_desc = new OpDesc(); - op_desc->SetType("scale"); - op_desc->SetInput("X", OutputGrad("Out")); - op_desc->SetOutput("Out", x_g); - op_desc->SetAttr("scale", 1.0f); - retv.emplace_back(op_desc); - } - - auto y_g = InputGrad("Y"); - if (!y_g.empty()) { - auto *op_desc = new OpDesc(); - op_desc->SetType("scale"); - op_desc->SetInput("X", OutputGrad("Out")); - op_desc->SetOutput("Out", y_g); - op_desc->SetAttr("scale", -1.0f); - retv.emplace_back(op_desc); - } - return retv; - } -}; - -class MinusOpMaker : public OpProtoAndCheckerMaker { - public: - MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", ""); - AddInput("Y", ""); - AddOutput("Out", ""); - AddComment("minus for unittest"); - } -}; -} // namespace framework -} // namespace paddle - -namespace f = paddle::framework; -namespace ops = paddle::operators; -using EnforceNotMet = paddle::platform::EnforceNotMet; -// rowwise_add -REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker, - f::RowWiseAddGradMaker); -REGISTER_OP_CPU_KERNEL(rowwise_add, - f::NoneKernel); -REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(rowwise_add_grad, - f::NoneKernel); -// mul -REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel); -REGISTER_OP_CPU_KERNEL(mul_grad, - f::NoneKernel); -// sigmoid -REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(sigmoid, - f::NoneKernel); -REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker); -// fill_zeros_like -REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker); -REGISTER_OP_CPU_KERNEL(fill_zeros_like, - f::NoneKernel); -// sum -REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel); -REGISTER_OP_CPU_KERNEL(sum_grad, - f::NoneKernel); -// fc -REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); -// many_output_op -REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker, - many_output_op_grad, f::NoneOp); -// mult_in_out -REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad, - f::NoneOp); -REGISTER_OP_CPU_KERNEL(mult_in_out, - f::NoneKernel); -REGISTER_OP_CPU_KERNEL(mult_in_out_grad, - f::NoneKernel); -// minus -REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker); -REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel); -// scale -REGISTER_OPERATOR(scale, f::NoneOp); -REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel); - -TEST(Backward, simple_op_not_need_grad) { - auto fwd = - f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - ASSERT_NE(fwd, nullptr); - auto gop = f::Backward(*fwd, {"x"}); - ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName); - - auto no_input_gop = f::Backward(*fwd, {"x", "b"}); - ASSERT_NE(no_input_gop, nullptr); - ASSERT_TRUE(no_input_gop->IsNetOp()); - ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size()); -} - -TEST(Backward, net_fc_backward_normal) { - std::shared_ptr fwd = - f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, - {{"mul_result", {"mul_res"}}, - {"add_result", {"add_re"}}, - {"Out", {"out"}}}, - f::AttributeMap{}); - ASSERT_NE(fwd, nullptr); - std::shared_ptr gop = - f::Backward(*fwd, std::unordered_set{}); - ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); - - ASSERT_NO_THROW(net->DebugString()); - - ASSERT_EQ(3UL, net->ops_.size()); - - f::OperatorBase &d_sigmoid = *net->ops_[0]; - ASSERT_EQ("sigmoid_grad", d_sigmoid.Type()); - - f::OperatorBase &d_add = *net->ops_[1]; - ASSERT_EQ("rowwise_add_grad", d_add.Type()); - - f::OperatorBase &d_mul = *net->ops_[2]; - ASSERT_EQ("mul_grad", d_mul.Type()); -} - -TEST(Backward, net_fc_backward_not_have_b) { - std::shared_ptr fwd = - f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}}, - {{"mul_result", {"mul_res"}}, - {"add_result", {"add_res"}}, - {"Out", {"tmp"}}}, - f::AttributeMap{}); - ASSERT_NE(fwd, nullptr); - std::shared_ptr gop = - f::Backward(*fwd, std::unordered_set{}); - ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); - - ASSERT_NO_THROW(net->DebugString()); - - ASSERT_EQ(2UL, net->ops_.size()); - - f::OperatorBase &d_sigmoid = *net->ops_[0]; - ASSERT_EQ("sigmoid_grad", d_sigmoid.Type()); - - f::OperatorBase &d_mul = *net->ops_[1]; - ASSERT_EQ("mul_grad", d_mul.Type()); -} - -TEST(Backward, net_input_of_network_not_need_grad) { - ops::NetOp net; - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, - {{"mul_result", {"mul_tmp_0"}}, - {"add_result", {"add_tmp_0"}}, - {"Out", {"hidden0"}}}, - f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, - {{"mul_result", {"mul_tmp_1"}}, - {"add_result", {"add_tmp_1"}}, - {"Out", {"hidden1"}}}, - f::AttributeMap{})); - net.CompleteAddOp(); - auto bwd = Backward(net, {"x"}); // x@GRAD is not need. - ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); - - auto output_vars = bwd_net->OutputVars(true); - std::unordered_set all_outputs = - std::unordered_set(output_vars.begin(), output_vars.end()); - all_outputs.erase(f::kEmptyVarName); - - for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end()); - } - - // Not Generated X - ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end()); - - ASSERT_EQ(2UL, bwd_net->ops_.size()); - ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); - auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); - ASSERT_EQ(3UL, first_fc_grad->ops_.size()); - ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output(f::GradVarName("X"))); -} - -TEST(Backward, net_shared_weight) { - ops::NetOp net; - net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, - {{"Out", {"out"}}}, f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, - {{"Out", {"FinalOut"}}}, - f::AttributeMap{})); - net.CompleteAddOp(); - - auto bwd = f::Backward(net, std::unordered_set{}); - ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); - ASSERT_EQ(3UL, bwd_net->ops_.size()); - ASSERT_EQ("sum", bwd_net->ops_[2]->Type()); -} - -TEST(Backward, op_all_input_are_not_need) { - auto fwd = - f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"x", "b"}); - ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); - ASSERT_TRUE(net->ops_.empty()); -} - -TEST(Backward, op_all_output_are_not_need) { - auto fwd = - f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"out"}); - ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); - ASSERT_TRUE(net->ops_.empty()); -} - -TEST(Backward, op_part_of_output_are_not_need) { - auto fwd = - f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}}, - {{"y", {"Y"}}, {"z", {"Z"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"Z"}); - ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); - ASSERT_EQ(net->ops_.size(), 2UL); - - auto &fill_zero = *net->ops_[0]; - ASSERT_EQ("fill_zeros_like", fill_zero.Type()); - ASSERT_EQ(1UL, fill_zero.Inputs("X").size()); - ASSERT_EQ("Z", fill_zero.Input("X")); - ASSERT_EQ(1UL, fill_zero.Outputs("Out").size()); - ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out")); - - auto &d_many_out = *net->ops_[1]; - ASSERT_EQ("many_output_op_grad", d_many_out.Type()); - ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.Inputs().size()); // I/O/OG - ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, - d_many_out.Input(f::GradVarName("z"))); - ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); - ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); -} - -TEST(Backward, op_part_of_input_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"a"}); - auto &grad_mul = *backward; - ASSERT_EQ(grad_mul.Type(), "mul_grad"); - ASSERT_EQ(grad_mul.Inputs().size(), 2UL + 1UL + 1UL); - ASSERT_EQ(grad_mul.Outputs().size(), 2UL); - ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b")); - ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); - ASSERT_EQ(grad_mul.Input("X"), "a"); - ASSERT_EQ(grad_mul.Input("Y"), "b"); - ASSERT_EQ(grad_mul.Input("Out"), "out"); -} - -TEST(Backward, linear_net_intermediate_variable_has_no_grad) { - ops::NetOp net; - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"mul_result", {"mul_out1"}}, - {"add_result", {"add_out1"}}, - {"Out", {"out1"}}}, - f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, - {{"mul_result", {"mul_out2"}}, - {"add_result", {"tmp_out2"}}, - {"Out", {"out2"}}}, - f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, - {{"mul_result", {"mul_out3"}}, - {"add_result", {"tmp_out3"}}, - {"Out", {"out3"}}}, - f::AttributeMap{})); - net.CompleteAddOp(); - - auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); - ASSERT_TRUE(backward->IsNetOp()); - auto bwd_net = static_cast(backward.get()); - ASSERT_EQ(bwd_net->ops_.size(), 3UL); - auto &grad_fc = *bwd_net->ops_[0]; - - const char *all = paddle::operators::NetOp::kAll; - EXPECT_EQ(grad_fc.Inputs(all).size(), - 2UL /* external input number */ - + 1UL /* external output number*/ - + 1UL /* number of gradient of external output*/ - + 2UL /* internal variable number*/ - ); - EXPECT_EQ(grad_fc.Outputs(all).size(), - 2UL /* input number of mul*/ - + 2UL /* input number of rowwise_add*/ - + 1UL /* input number of sigmod */ - - 1UL /* out2 is not needed*/); - EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); - EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL); -} - -TEST(Backward, simple_single_op) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - - f::OpDesc *op = block->AppendOp(); - op->SetType("rowwise_add"); - op->SetInput("X", {"x"}); - op->SetInput("b", {"b"}); - op->SetOutput("Out", {"out"}); - - auto target = f::VarDesc("out"); - target.SetShape({1}); - auto var_to_grad = - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 3UL); - f::OpDesc *fill_op = block->AllOps()[1]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op = block->AllOps()[2]; - EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op->InputNames().size(), 1UL); - ASSERT_EQ(grad_op->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out")})); - EXPECT_EQ(grad_op->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x")})); - EXPECT_EQ(grad_op->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b")})); - - EXPECT_EQ(var_to_grad.size(), 3UL); - EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2)); - EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("b"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("x"))); -} - -TEST(Backward, default_attribute) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op = block->AppendOp(); - op->SetType("mul"); - op->SetInput("X", {"x"}); - op->SetInput("Y", {"y"}); - op->SetOutput("Out", {"out"}); - op->CheckAttrs(); - - auto target = f::VarDesc("out"); - target.SetShape({1}); - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 3UL); - EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1); - EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1); - - f::OpDesc *fill_op = block->AllOps()[1]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op = block->AllOps()[2]; - ASSERT_EQ(grad_op->Type(), "mul_grad"); - EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1); - EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1); -} - -TEST(Backward, simple_mult_op) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("rowwise_add"); - op1->SetInput("X", {"x1"}); - op1->SetInput("b", {"b1"}); - op1->SetOutput("Out", {"out1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mul"); - op2->SetInput("X", {"out1"}); - op2->SetInput("Y", {"y2"}); - op2->SetOutput("Out", {"out2"}); - - f::OpDesc *op3 = block->AppendOp(); - op3->SetType("rowwise_add"); - op3->SetInput("X", {"out2"}); - op3->SetInput("b", {"b3"}); - op3->SetOutput("Out", {"out3"}); - - auto target = f::VarDesc("out3"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 6UL + 1); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op1 = block->AllOps()[6]; - EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 1UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b1")})); - - f::OpDesc *grad_op2 = block->AllOps()[5]; - EXPECT_EQ(grad_op2->Type(), "mul_grad"); - ASSERT_EQ(grad_op2->InputNames().size(), 4UL); - ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op2->Input("X"), std::vector({"out1"})); - EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"})); - EXPECT_EQ(grad_op2->Input("Out"), std::vector({"out2"})); - EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out2")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), - std::vector({f::GradVarName("y2")})); - - f::OpDesc *grad_op3 = block->AllOps()[4]; - EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op3->InputNames().size(), 1UL); - ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out3")})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out2")})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b3")})); - - EXPECT_EQ(var_to_grad.size(), 7UL); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("out1"), - f::GradVarInfo(f::GradVarName("out1"), 0, 5)); - EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); - EXPECT_EQ(var_to_grad.at("out2"), - f::GradVarInfo(f::GradVarName("out2"), 0, 4)); - EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out2"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); -} - -TEST(Backward, intermedia_var_no_grad) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("rowwise_add"); - op1->SetInput("X", {"x1"}); - op1->SetInput("b", {"b1"}); - op1->SetOutput("Out", {"out1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mul"); - op2->SetInput("X", {"x2"}); - op2->SetInput("Y", {"y2"}); - op2->SetOutput("Out", {"out2"}); - - f::OpDesc *op3 = block->AppendOp(); - op3->SetType("rowwise_add"); - op3->SetInput("X", {"out2"}); - op3->SetInput("b", {"b3"}); - op3->SetOutput("Out", {"out3"}); - - f::OpDesc *op4 = block->AppendOp(); - op4->SetType("mul"); - op4->SetInput("X", {"out1"}); - op4->SetInput("Y", {"out3"}); - op4->SetOutput("Out", {"out4"}); - - auto target = f::VarDesc("out4"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = AppendBackward(program, target, {"out3"}); - - ASSERT_EQ(block->AllOps().size(), 7UL); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op1 = block->AllOps()[6]; - EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 1UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b1")})); - - f::OpDesc *grad_op4 = block->AllOps()[5]; - EXPECT_EQ(grad_op4->Type(), "mul_grad"); - ASSERT_EQ(grad_op4->InputNames().size(), 4UL); - ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"})); - EXPECT_EQ(grad_op4->Input("Y"), std::vector({"out3"})); - EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out4"})); - EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out4")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector()); - - EXPECT_EQ(var_to_grad.size(), 4UL); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("out1"), - f::GradVarInfo(f::GradVarName("out1"), 0, 5)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); -} - -TEST(Backward, var_no_grad) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("mult_in_out"); - op1->SetInput("X", {"x1"}); - op1->SetInput("H", {"h1"}); - op1->SetOutput("Y", {"y1"}); - op1->SetOutput("Z", {"z1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mult_in_out"); - op2->SetInput("X", {"y1"}); - op2->SetInput("H", {"z1"}); - op2->SetOutput("Y", {"y2"}); - op2->SetOutput("Z", {"z2"}); - - auto target = f::VarDesc("z2"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = AppendBackward(program, target, {"z1"}); - - ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op2 = block->AllOps()[3]; - ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); - ASSERT_EQ(grad_op2->InputNames().size(), 6UL); - ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op2->Input("X"), std::vector({"y1"})); - EXPECT_EQ(grad_op2->Input("H"), std::vector({"z1"})); - EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"})); - EXPECT_EQ(grad_op2->Input("Z"), std::vector({"z2"})); - EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")), - std::vector({f::GradVarName("y2")})); - EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")), - std::vector({f::GradVarName("z2")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), - std::vector({f::GradVarName("y1")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); - - f::OpDesc *fill_zero_op = block->AllOps()[4]; - ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); - ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); - ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); - EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"})); - EXPECT_EQ(fill_zero_op->Output("Out"), - std::vector({std::string("z1") + f::kZeroVarSuffix})); - - f::OpDesc *grad_op1 = block->AllOps()[5]; - ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 6UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input("X"), std::vector({"x1"})); - EXPECT_EQ(grad_op1->Input("H"), std::vector({"h1"})); - EXPECT_EQ(grad_op1->Input("Y"), std::vector({"y1"})); - EXPECT_EQ(grad_op1->Input("Z"), std::vector({"z1"})); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")), - std::vector({f::GradVarName("y1")})); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")), - std::vector({std::string("z1") + f::kZeroVarSuffix})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), - std::vector({f::GradVarName("h1")})); - - EXPECT_EQ(var_to_grad.size(), 4UL); - EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3)); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5)); - EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("y1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("h1"))); -} - -TEST(Backward, shared_var) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("rowwise_add"); - op1->SetInput("X", {"x1"}); - op1->SetInput("b", {"b1"}); - op1->SetOutput("Out", {"out1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mul"); - op2->SetInput("X", {"out1"}); - op2->SetInput("Y", {"y2"}); - op2->SetOutput("Out", {"out2"}); - - f::OpDesc *op3 = block->AppendOp(); - op3->SetType("rowwise_add"); - op3->SetInput("X", {"out1"}); - op3->SetInput("b", {"b3"}); - op3->SetOutput("Out", {"out3"}); - - auto target = f::VarDesc("out3"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 8UL); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op3 = block->AllOps()[4]; - ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op3->InputNames().size(), 1UL); - ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out3")})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1") + "@RENAME@0"})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b3")})); - - f::OpDesc *grad_op4 = block->AllOps()[5]; - ASSERT_EQ(grad_op4->Type(), "mul_grad"); - ASSERT_EQ(grad_op4->InputNames().size(), 4UL); - ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"})); - EXPECT_EQ(grad_op4->Input("Y"), std::vector({"y2"})); - EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out2"})); - EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out2")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1") + "@RENAME@1"})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), - std::vector({f::GradVarName("y2")})); - - f::OpDesc *sum_op = block->AllOps()[6]; - ASSERT_EQ(sum_op->Type(), "sum"); - ASSERT_EQ(sum_op->InputNames().size(), 1UL); - ASSERT_EQ(sum_op->OutputNames().size(), 1UL); - EXPECT_EQ(sum_op->Input("X"), - std::vector({f::GradVarName("out1") + "@RENAME@0", - f::GradVarName("out1") + "@RENAME@1"})); - EXPECT_EQ(sum_op->Output("Out"), - std::vector({f::GradVarName("out1")})); - - f::OpDesc *grad_op1 = block->AllOps()[7]; - ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 1UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b1")})); - - EXPECT_EQ(var_to_grad.size(), 6UL); - EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); - EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); - EXPECT_EQ(var_to_grad.at("out1"), - f::GradVarInfo(f::GradVarName("out1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7)); - EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); -} - -TEST(Backward, half_backward) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - auto *op1 = block->AppendOp(); - op1->SetType("minus"); - op1->SetInput("X", {"a"}); - op1->SetInput("Y", {"b"}); - op1->SetOutput("Out", {"out"}); - - auto target = f::VarDesc("out"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = AppendBackward(program, target, {"b"}); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - auto ops = block->AllOps(); - ASSERT_EQ(3UL, ops.size()); - - EXPECT_EQ(var_to_grad.size(), 2UL); - EXPECT_EQ(var_to_grad.at("a"), - f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1)); -} diff --git a/paddle/fluid/framework/prune_test.cc b/paddle/fluid/framework/prune_test.cc index 0e44b34383..8af7d2d510 100644 --- a/paddle/fluid/framework/prune_test.cc +++ b/paddle/fluid/framework/prune_test.cc @@ -14,18 +14,17 @@ limitations under the License. */ #include "paddle/fluid/framework/prune.h" +#include +#include + #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/net_op.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" -#include - namespace f = paddle::framework; -namespace ops = paddle::operators; void AddOp(const std::string &type, const f::VariableNameMap &inputs, const f::VariableNameMap &outputs, f::AttributeMap attrs, diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 5ff987ad8b..3c8696b508 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -100,7 +100,7 @@ function(op_library TARGET) endif() # Define operators that don't need pybind here. - foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") + foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") if ("${TARGET}" STREQUAL "${manual_pybind_op}") set(pybind_flag 1) endif() @@ -199,7 +199,6 @@ else() set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op) endif() -op_library(cond_op DEPS framework_proto tensor net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_op DEPS softmax) @@ -259,7 +258,6 @@ endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") cc_test(gather_test SRCS gather_test.cc DEPS tensor) -cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op) diff --git a/paddle/fluid/operators/cond_op.cc b/paddle/fluid/operators/cond_op.cc deleted file mode 100644 index 15dce9e3e2..0000000000 --- a/paddle/fluid/operators/cond_op.cc +++ /dev/null @@ -1,235 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/cond_op.h" -#include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/scatter.h" -#include "paddle/fluid/platform/device_context.h" - -namespace paddle { -namespace operators { - -using Scope = framework::Scope; -using Variable = framework::Variable; -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; -using DDim = framework::DDim; - -framework::Scope& CondOp::AddSubScope(const Scope& scope) const { - auto sub_scopes_var = scope.FindVar("SubScopes"); - PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, - "Output(SubScopes) of CondOp should not be null."); - auto sub_scopes = sub_scopes_var->GetMutable>(); - auto& sub_scope = scope.NewScope(); - sub_scopes->push_back(&sub_scope); - return sub_scope; -} - -std::vector& CondOp::GetSubScopes( - const framework::Scope& scope) const { - auto sub_scopes_var = scope.FindVar("SubScopes"); - PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, - "Output(SubScopes) of CondOp should not be null."); - return *sub_scopes_var->GetMutable>(); -} - -LoDTensor& CondOp::AddIndexTensor(const Scope& scope) const { - auto index_tensors_var = scope.FindVar("IndexTensors"); - PADDLE_ENFORCE_NOT_NULL(index_tensors_var, - "Output(IndexTensors) of CondOp should not be null."); - auto& index_tensors = - *index_tensors_var->GetMutable>(); - index_tensors.push_back(LoDTensor()); - return index_tensors.back(); -} - -std::vector& CondOp::GetIndexTensors( - const framework::Scope& scope) const { - auto* index_tensors_var = scope.FindVar("IndexTensors"); - PADDLE_ENFORCE_NOT_NULL(index_tensors_var, - "Output(IndexTensors) of CondOp should not be null."); - return *index_tensors_var->GetMutable>(); -} - -void CondOp::PrepareDataForSubnet( - const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const { - PADDLE_ENFORCE(!Inputs("Xs").empty(), "Inputs(Xs) of CondOp can't be empty."); - - for (int i = 0; i < BRANCH_NUM; ++i) { - // Create two sub scopes for true and false branches - // sub_scopes[0] for the true branch - // sub_scopes[1] for the false branch - AddSubScope(scope); - // Create two tensors for true and false indices: - // index_tensors[0] for the true branch - // index_tensors[1] for the false branch - AddIndexTensor(scope); - } - - Variable* cond_var = scope.FindVar(Input("Cond")); - PADDLE_ENFORCE_NOT_NULL(cond_var, - "Input(Cond) of CondOp should not be null."); - const LoDTensor* cond = cond_var->GetMutable(); - - // get the true/false index at runtime according to cond tensor - // index_vectors[0]: vector, contains all index for cond[i] == true - // index_vectors[1]: vector, contains all index for cond[i] == false - std::vector> index_vectors; - index_vectors.resize(BRANCH_NUM); - - const int* cond_data = cond->data(); - for (int i = 0; i < cond->dims()[0]; ++i) { - if (cond_data[i]) - index_vectors[TRUE_BRANCH].push_back(i); - else - index_vectors[FALSE_BRANCH].push_back(i); - } - - // put index_vectors[0] and index_vectors[1] into two tensors: - // index_tensors[0] and index_tensors[1] - std::vector& index_tensors = GetIndexTensors(scope); - std::vector& sub_scopes = GetSubScopes(scope); - - for (int i = 0; i < BRANCH_NUM; ++i) { - DDim dim = {static_cast(index_vectors[i].size())}; - int* index_tensor_data_ptr = - index_tensors[i].mutable_data(dim, platform::CPUPlace()); - memcpy(index_tensor_data_ptr, index_vectors[i].data(), - dim[0] * sizeof(int)); - } - - // create input in subscopes according to index_vectors - for (auto& input : Inputs("Xs")) { - Variable* var_parent = scope.FindVar(input); - PADDLE_ENFORCE_NOT_NULL(var_parent); - const auto* tensor_parent = &var_parent->Get(); - - for (int i = 0; i < BRANCH_NUM; ++i) { - Variable* var_child = sub_scopes[i]->FindVar(input); - PADDLE_ENFORCE_NOT_NULL(var_child); - auto* tensor_child = var_child->GetMutable(); - - // Resize child - DDim dim = tensor_parent->dims(); - dim[0] = index_tensors[i].dims()[0]; - tensor_child->mutable_data(dim, platform::CPUPlace()); - - CPUGather(dev_ctx, *tensor_parent, index_tensors[i], tensor_child); - } - } - - // create output_tensors in subscope for sub_net - for (int i = 0; i < BRANCH_NUM; ++i) { - for (auto& output : (*sub_net_op_[i]).Outputs()) { - for (auto& var_name : output.second) { - sub_scopes[i]->Var(var_name); - } - } - } -} - -void CondOp::MergeDataFromSubnet(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const { - std::vector& sub_scopes = GetSubScopes(scope); - const std::vector& index_tensors = - GetIndexTensors(scope); - - // Infer the output dim, out_dim[0] = true_dim[0] + false_dim[0] - PADDLE_ENFORCE(!Outputs("Outs").empty(), - "Outputs(Outs) of CondOp can't be empty."); - for (auto& output : Outputs("Outs")) { - const LoDTensor* tensor_t_out = - &sub_scopes[TRUE_BRANCH]->FindVar(output)->Get(); - PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL"); - const LoDTensor* tensor_f_out = - &sub_scopes[FALSE_BRANCH]->FindVar(output)->Get(); - PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL"); - - auto* var_out = scope.FindVar(output); - PADDLE_ENFORCE_NOT_NULL(var_out, "Output not found"); - LoDTensor* tensor_out = var_out->GetMutable(); - PADDLE_ENFORCE_NOT_NULL(tensor_t_out, - "True output tensor should not be NULL"); - - DDim true_dim = tensor_t_out->dims(); - DDim false_dim = tensor_f_out->dims(); - true_dim[0] = 0; - false_dim[0] = 0; - PADDLE_ENFORCE_EQ(true_dim, false_dim, - "Outputs not of the same shape except the first dim"); - - DDim out_dim = tensor_t_out->dims(); - out_dim[0] = tensor_t_out->dims()[0] + tensor_f_out->dims()[0]; - tensor_out->Resize(out_dim); - tensor_out->mutable_data(platform::CPUPlace()); - } - - // merge output results: - // output_tensor = true_output_tensor + false_output_tensor - for (auto& output : Outputs("Outs")) { - Variable* var_parent = scope.FindVar(output); - PADDLE_ENFORCE_NOT_NULL(var_parent); - auto* tensor_parent = var_parent->GetMutable(); - - for (int i = 0; i < BRANCH_NUM; ++i) { - Variable* var_child = sub_scopes[i]->FindVar(output); - PADDLE_ENFORCE_NOT_NULL(var_child); - auto* tensor_child = &var_child->Get(); - ScatterAssign(dev_ctx, *tensor_child, index_tensors[i], - tensor_parent); - } - } -} - -void CondOp::RunImpl(const Scope& scope, const platform::Place& place) const { - // get device context from pool - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto& dev_ctx = *pool.Get(place); - - PrepareDataForSubnet(scope, dev_ctx); - std::vector& sub_scopes = GetSubScopes(scope); - for (int i = 0; i < BRANCH_NUM; ++i) { - sub_net_op_[i]->Run(*sub_scopes[i], place); - } - MergeDataFromSubnet(scope, dev_ctx); -} - -class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { - public: - CondOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("Cond", "The condition, which is a bool vector"); - AddInput("Xs", "Inputs of Subnets").AsDuplicable(); - AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable(); - - AddOutput("SubScopes", "sub scopes for true and false branches"); - AddOutput("IndexTensors", "Index Tensors contains indices for true/false"); - - AddComment(R"DOC( -Sample Dependent Conditional Operator. - -Given Cond[i] as a 1/0 vector to indicate true/false: -Out[i] = subnet_true[i], if Cond[i] == true -Out[i] = subnet_false[i], if Cond[i] == false - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp, - paddle::operators::CondOpProtoAndCheckerMaker); diff --git a/paddle/fluid/operators/cond_op.h b/paddle/fluid/operators/cond_op.h deleted file mode 100644 index d3888923db..0000000000 --- a/paddle/fluid/operators/cond_op.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "glog/logging.h" -#include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/net_op.h" - -namespace paddle { -namespace operators { - -/* - * @brief CondOp is a dynamic if-else Operator - * - * It has a input tensor named cond indicating which netop each instance will - * run. - * - * if cond == 1, it will run true_net, which is a NetOp. - * - * if cond == 0, it will run false_net, which is another NetOp. - */ -class CondOp : public framework::OperatorBase { - public: - CondOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) { - sub_net_op_.resize(BRANCH_NUM); - } - - CondOp(const CondOp& o) - : framework::OperatorBase( - static_cast(o)) { - // TODO(yuyang18): Implement copy ctor well. - PADDLE_THROW("Not implemented"); - } - - framework::Scope& AddSubScope(const framework::Scope& scope) const; - std::vector& GetSubScopes( - const framework::Scope& scope) const; - - framework::LoDTensor& AddIndexTensor(const framework::Scope& scope) const; - std::vector& GetIndexTensors( - const framework::Scope& scope) const; - - void PrepareDataForSubnet(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const; - void MergeDataFromSubnet(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const; - - /* - * Set True Block - */ - void set_truenet(std::unique_ptr&& net) { - sub_net_op_[TRUE_BRANCH] = std::move(net); - } - - /* - * Set False Block - */ - void set_falsenet(std::unique_ptr&& net) { - sub_net_op_[FALSE_BRANCH] = std::move(net); - } - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& place) const override; - - private: - const int TRUE_BRANCH = 0; - const int FALSE_BRANCH = 1; - const int BRANCH_NUM = 2; - - // sub_net_op_[0]: subnet_t - // sub_net_op_[1]: subnet_f - std::vector> sub_net_op_; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc index 5790deb54d..a302b24560 100644 --- a/paddle/fluid/operators/minus_op.cc +++ b/paddle/fluid/operators/minus_op.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/minus_op.h" + #include #include -#include "paddle/fluid/operators/net_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/net_op.cc b/paddle/fluid/operators/net_op.cc deleted file mode 100644 index 0c2da74417..0000000000 --- a/paddle/fluid/operators/net_op.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/net_op.h" -#include -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -const char NetOp::kAll[] = "all"; - -void NetOp::CompleteAddOp(bool calc) { - add_op_done_ = true; - if (!calc) return; - std::set input_set; - std::set output_set; - for (auto& op : ops_) { - for (auto& ipt : op->Inputs()) { - for (auto& var_name : ipt.second) { - // If input variable has been in output set, then it will be - // added into intermediate_outputs_. Otherwise, it will be - // added into input set. - if (Contains(output_set, var_name)) { - intermediate_outputs_.insert(var_name); - } else { - input_set.insert(var_name); - } - } - } - - for (auto& opt : op->Outputs()) { - for (auto& var_name : opt.second) { - output_set.insert(var_name); - } - } - } - auto& inputs = inputs_[kAll]; - inputs.reserve(input_set.size()); - std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs)); - auto& outputs = outputs_[kAll]; - outputs.reserve(output_set.size()); - std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs)); -} - -std::string NetOp::DebugStringEx(const framework::Scope* scope) const { - std::ostringstream os; - os << OperatorBase::DebugStringEx(scope) << std::endl; - for (auto& op : ops_) { - std::istringstream is(op->DebugStringEx(scope)); - for (std::string line; std::getline(is, line);) { - os << " " << line << std::endl; - } - } - return os.str(); -} - -bool NetOp::IsNetOp() const { return true; } - -std::vector NetOp::OutputVars(bool has_intermediate) const { - std::vector all; - for (auto& pair : this->outputs_) { - for (auto& var_name : pair.second) { - all.push_back(var_name); - } - } - if (has_intermediate) { - return all; - } - std::vector ret_val; - for (auto& each : all) { - if (!Contains(intermediate_outputs_, each)) { - ret_val.push_back(each); - } - } - return ret_val; -} - -NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - -std::unique_ptr NetOp::Clone() const { - PADDLE_ENFORCE( - add_op_done_, - "Must clone a sealed NetOp, invoke Net::CompleteAddOp before clone"); - return std::unique_ptr(new NetOp(*this)); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/net_op.h b/paddle/fluid/operators/net_op.h deleted file mode 100644 index cbf8820cf4..0000000000 --- a/paddle/fluid/operators/net_op.h +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -/** - * @brief Network is also a type of Operator - * - * It will manage the operators it has. - * - * Network is the container and controller of a set of operators. - - * A network object knows all Operators belonging to this network. Variables, - * which are inputs and outputs of these operators, are created and managed by a - * hierarchy of Scope objects. - * - * This is the base class of network, all the networks should implement the APIs - * it defines. - */ -class NetOp : public framework::OperatorBase { - public: - static const char kAll[]; - NetOp() - : framework::OperatorBase("plain_net", framework::VariableNameMap{}, - framework::VariableNameMap{}, - framework::AttributeMap{}) {} - - NetOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs); - - NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) { - this->ops_.reserve(o.ops_.size()); - std::transform( - o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_), - [](const std::unique_ptr& op) { - return std::unique_ptr(op->Clone()); - }); - this->CompleteAddOp(); - } - - bool SupportGPU() const override { - for (auto& op : ops_) { - if (!op->SupportGPU()) { - return false; - } - } - return true; - } - - void AppendOp(const framework::OperatorBase& op) { AppendOp(op.Clone()); } - - /** - * @brief Add an operator by ptr - */ - void AppendOp(std::unique_ptr op) { - PADDLE_ENFORCE(!add_op_done_, - "Cannot AppendOp when this network is sealed"); - PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); - ops_.push_back(std::move(op)); - } - - void InsertOp(size_t pos, std::unique_ptr op) { - PADDLE_ENFORCE(!add_op_done_, - "Cannot InsertOp when this network is sealed"); - PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); - PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); - ops_.insert(ops_.begin() + pos, std::move(op)); - } - - void InsertOp(size_t pos, const framework::OperatorBase& op) { - InsertOp(pos, op.Clone()); - } - - void CompleteAddOp(bool calculate = true); - - std::string DebugStringEx( - const framework::Scope* scope = nullptr) const override; - - bool IsNetOp() const override; - std::vector OutputVars(bool has_intermediate) const override; - - std::unique_ptr Clone() const override; - - std::vector> ops_; - - private: - /** - * @brief Run the network. - * - * Run all the operators with the `scope`, if no scope is provided, default - * scope will be used instead. If no OpContext is provicded, default context - * will be used. - */ - void RunImpl(const framework::Scope& scope, - const platform::Place& place) const override { - for (auto& op : ops_) { - op->Run(scope, place); - } - } - - bool add_op_done_{false}; - std::set intermediate_outputs_; - - template - static bool Contains(T container, KeyType key) { - return container.find(key) != container.end(); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/net_op_test.cc b/paddle/fluid/operators/net_op_test.cc deleted file mode 100644 index 3b5f575485..0000000000 --- a/paddle/fluid/operators/net_op_test.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "paddle/fluid/operators/net_op.h" - -#include - -namespace paddle { -namespace operators { -using Scope = framework::Scope; -using DeviceContext = platform::DeviceContext; - -static int run_cnt = 0; - -class TestOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - DEFINE_OP_CLONE_METHOD(TestOp); - - private: - void RunImpl(const Scope& scope, - const platform::Place& place) const override { - ++run_cnt; - } -}; - -template -void AssertSameVectorWithoutOrder(const std::vector& expected, - const std::vector& actual) { - ASSERT_EQ(expected.size(), actual.size()); - std::unordered_set expected_set; - for (auto& tmp : expected) { - expected_set.insert(tmp); - } - for (auto& act : actual) { - ASSERT_NE(expected_set.end(), expected_set.find(act)); - } -} - -TEST(OpKernel, all) { - auto net = std::make_shared(); - ASSERT_NE(net, nullptr); - - net->AppendOp(std::unique_ptr( - new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"Out", {"y"}}}, framework::AttributeMap{}))); - net->AppendOp(std::unique_ptr( - new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, - {{"Out", {"z"}}}, framework::AttributeMap{}))); - - net->CompleteAddOp(); - AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, - net->Inputs(NetOp::kAll)); - AssertSameVectorWithoutOrder({"y", "z"}, net->Outputs(NetOp::kAll)); - - auto final_outs = net->OutputVars(false); - - ASSERT_EQ(final_outs.size(), 1UL); - ASSERT_EQ(final_outs[0], "z"); -} - -TEST(NetOp, insert_op) { - NetOp net; - auto op1 = std::unique_ptr( - new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"Out", {"y"}}}, framework::AttributeMap{})); - net.AppendOp(*op1); - net.InsertOp(0, *op1); - ASSERT_EQ(2UL, net.ops_.size()); - net.InsertOp(2, std::move(op1)); - ASSERT_EQ(3UL, net.ops_.size()); -} - -TEST(NetOp, Clone) { - NetOp net; - net.AppendOp(std::unique_ptr(new framework::NOP{ - "empty", framework::VariableNameMap{}, framework::VariableNameMap{}, - framework::AttributeMap{}})); - net.AppendOp(std::unique_ptr(new framework::NOP{ - "empty2", framework::VariableNameMap{}, framework::VariableNameMap{}, - framework::AttributeMap{}})); - net.CompleteAddOp(true); - auto new_net_op = net.Clone(); - ASSERT_NE(new_net_op, nullptr); - ASSERT_TRUE(new_net_op->IsNetOp()); - auto* new_net = static_cast(new_net_op.get()); - ASSERT_EQ(2UL, new_net->ops_.size()); - ASSERT_EQ(new_net->ops_[0]->Type(), "empty"); - ASSERT_EQ(new_net->ops_[1]->Type(), "empty2"); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index 447b854544..7fb45bd19d 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/prelu_op.h" -#include "paddle/fluid/operators/net_op.h" + +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index b16d06df8d..7ca7639fdb 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" -#include "paddle/fluid/operators/net_op.h" + +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index dffac772f1..e745509ec8 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/split_op.h" -#include "paddle/fluid/operators/net_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d559743a69..e571da42cb 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -20,8 +20,6 @@ limitations under the License. */ #include #include -#include "paddle/fluid/pybind/protobuf.h" - #include "paddle/fluid/framework/backward.h" #include "paddle/fluid/framework/channel.h" #include "paddle/fluid/framework/executor.h" @@ -31,18 +29,18 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/operators/cond_op.h" -#include "paddle/fluid/operators/net_op.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/exception.h" -#include "paddle/fluid/pybind/pybind.h" +#include "paddle/fluid/pybind/protobuf.h" +#include "paddle/fluid/pybind/pybind.h" // NOLINT #include "paddle/fluid/pybind/recordio.h" #include "paddle/fluid/pybind/tensor_py.h" @@ -239,11 +237,6 @@ All parameter, weight, gradient are variables in Paddle. }, py::return_value_policy::reference) #endif - .def("get_net", - [](Variable &self) -> operators::NetOp * { - return self.GetMutable(); - }, - py::return_value_policy::reference) .def("get_reader", [](Variable &self) -> framework::ReaderHolder * { PADDLE_ENFORCE(self.IsType()); @@ -420,42 +413,6 @@ All parameter, weight, gradient are variables in Paddle. [](const OperatorBase &op) { return op.OutputVars(false); }) .def("support_gpu", &OperatorBase::SupportGPU); - py::class_(m, "Net") - .def_static("create", - []() -> operators::NetOp * { - auto *retv = new operators::NetOp; - retv->SetType("plain_net"); - return retv; - }) - .def("append_op", [](operators::NetOp &self, - const OperatorBase &op) { self.AppendOp(op); }) - .def("complete_add_op", &operators::NetOp::CompleteAddOp) - .def("complete_add_op", [](std::shared_ptr &self) { - self->CompleteAddOp(); - }); - - // cond_op - py::class_(m, "CondOp") - .def_static("create", - [](py::bytes protobin) -> operators::CondOp * { - proto::OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - auto cond_op = OpRegistry::CreateOp(desc); - return static_cast(cond_op.release()); - }) - .def("set_truenet", - [](operators::CondOp &self, const operators::NetOp &net) -> void { - self.set_truenet(net.Clone()); - }) - .def("set_falsenet", - [](operators::CondOp &self, const operators::NetOp &net) -> void { - self.set_falsenet(net.Clone()); - }); - py::class_(m, "Executor") .def(py::init()) .def("run", diff --git a/paddle/fluid/recordio/chunk.cc b/paddle/fluid/recordio/chunk.cc index e7ebbba452..82d9aa601c 100644 --- a/paddle/fluid/recordio/chunk.cc +++ b/paddle/fluid/recordio/chunk.cc @@ -14,13 +14,13 @@ #include "paddle/fluid/recordio/chunk.h" +#include #include #include #include #include "paddle/fluid/platform/enforce.h" -#include "snappy_stream/include/snappystream.hpp" -#include "zlib/include/zlib.h" +#include "snappystream.hpp" namespace paddle { namespace recordio { diff --git a/paddle/fluid/recordio/header.cc b/paddle/fluid/recordio/header.cc index ed09d58f6a..c4822329a4 100644 --- a/paddle/fluid/recordio/header.cc +++ b/paddle/fluid/recordio/header.cc @@ -13,6 +13,9 @@ // limitations under the License. #include "paddle/fluid/recordio/header.h" + +#include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 10aa63e18a..7ecf9a1459 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -14,23 +14,13 @@ import unittest import numpy as np -from op_test import OpTest import paddle.fluid.core as core from paddle.fluid.op import Operator +import paddle.fluid as fluid +from op_test import OpTest from paddle.fluid.framework import grad_var_name -def get_backward_op(scope, op, no_grad_set): - backward_op = core.Operator.backward(op, no_grad_set) - for input in backward_op.input_vars(): - var = scope.var(input) - var.get_tensor() - for output in backward_op.output_vars(): - var = scope.var(output) - var.get_tensor() - return backward_op - - def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): x_shape = x.shape if len(x_shape) == 2: @@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): def _reference_training(x, scale, offset, epsilon, data_format): x_shape = x.shape - if len(x_shape) == 2: - if data_format == "NCHW": - x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) - else: - x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1])) if data_format == "NCHW": n, c, h, w = x.shape @@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format): offset_tile = np.reshape(offset, (1, c, 1, 1)) offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) y = normalized * scale_tile + offset_tile - if len(x_shape) == 2: - y = np.reshape(y, (y.shape[0], y.shape[1])) return y, mean, var elif data_format == "NHWC": x_square = x * x @@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format): var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) y = normalized * scale + offset - if len(x_shape) == 2: - y = np.reshape(y, x_shape) return y, mean, var else: raise ValueError("Unknown data order.") -def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): +def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format): # Use the following formulas to calculate gradients: # grad_scale = # sum(grad_y * (x - mean)) * rsqrt(var + epsilon) # # grad_offset = sum(output_y) # - # grad_x = + # x_grad = # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation - x_shape = x.shape - - if len(x_shape) == 2: - if data_format == "NCHW": - x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) - grad_y = np.reshape(grad_y, - (grad_y.shape[0], grad_y.shape[1], 1, 1)) - else: - x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1])) - grad_y = np.reshape(grad_y, - (grad_y.shape[0], 1, 1, grad_y.shape[1])) - if data_format == "NCHW": x = np.transpose(x, (0, 2, 3, 1)) - grad_y = np.transpose(grad_y, (0, 2, 3, 1)) + y_grad = np.transpose(y_grad, (0, 2, 3, 1)) - # raise ValueError("data_format must be NHWC, got %s." % data_format) - grad_x = scale * (grad_y - np.mean( - grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean( - grad_y * (x - mean), axis=(0, 1, 2)) / + x_grad = scale * (y_grad - np.mean( + y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean( + y_grad * (x - mean), axis=(0, 1, 2)) / (var + epsilon)) / np.sqrt(var + epsilon) - grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon), + grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2)) - grad_offset = np.sum(grad_y, axis=(0, 1, 2)) + grad_offset = np.sum(y_grad, axis=(0, 1, 2)) # transfer back to N, C, H, W if data_format == "NCHW": - grad_x = np.transpose(grad_x, (0, 3, 1, 2)) + x_grad = np.transpose(x_grad, (0, 3, 1, 2)) x = np.transpose(x, (0, 3, 1, 2)) - grad_y = np.transpose(grad_y, (0, 3, 1, 2)) + y_grad = np.transpose(y_grad, (0, 3, 1, 2)) - if len(x_shape) == 2: - grad_x = np.reshape(grad_x, x_shape) - return grad_x, grad_scale, grad_offset + return x_grad, grad_scale, grad_offset def create_or_get_tensor(scope, var_name, var, place): @@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None): __set_tensor__(output, data) -class TestBatchNormOpInference(OpTest): +class TestBatchNormOpInference(unittest.TestCase): def setUp(self): self.dtype = np.float32 @@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): self.check_with_place(place, data_format, self.dtype, [2, 3]) -class TestBatchNormOpTraining(OpTest): +class TestBatchNormOpTraining(unittest.TestCase): def __assert_close(self, tensor, np_array, msg, atol=1e-4): + if not np.allclose(np.array(tensor), np_array, atol=atol): + import pdb + pdb.set_trace() self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) - def test_python_testing(self): - data_format = "NHWC" - epsilon = 0.00001 - - n, h, w, c = 2, 3, 4, 5 - x_shape = [n, h, w, c] - scale_shape = [c] - - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) - - mean = np.zeros(scale_shape).astype(np.float32) - variance = np.ones(scale_shape).astype(np.float32) - - y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance, - epsilon, "NHWC") - - # running N, C, H, W case - # should produce the same results - x_shape2 = [n, c, h, w] - x_val2 = np.transpose(x_val, (0, 3, 1, 2)) - y_out2 = _reference_testing(x_val2, scale_val, bias_val, mean, variance, - epsilon, "NCHW") - - # transfer (N, C, H, W) back to (N, H, W, C) - y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1)) - self.__assert_close(y_out, y_out2_trans, "inference output") - print 'python: NHWC, NCHW, inference checking passed' - - def test_python_training(self): - data_format = "NHWC" - epsilon = 0.00001 - momentum = 0.9 - - # N, H, W, C: 2, 3, 4, 2 - n, h, w, c = 2, 3, 4, 5 - x_shape = [n, h, w, c] - scale_shape = [c] - - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) - - mean = np.zeros(scale_shape).astype(np.float32) - variance = np.ones(scale_shape).astype(np.float32) - - # run forward - y_out, saved_mean, var_ref = _reference_training( - x_val, scale_val, bias_val, epsilon, "NHWC") - - # - mean_out = saved_mean * (1. - momentum) + momentum * mean - variance_out = var_ref * (1. - momentum) + momentum * variance - saved_variance = 1. / np.sqrt(var_ref + epsilon) - - # running N, C, H, W case - # should produce the same results - x_shape2 = [n, c, h, w] - x_val2 = np.transpose(x_val, (0, 3, 1, 2)) - y_out2, saved_mean2, var_ref2 = _reference_training( - x_val2, scale_val, bias_val, epsilon, "NCHW") - - self.__assert_close(saved_mean, saved_mean2, "batch mean") - self.__assert_close(var_ref, var_ref2, "batch variance") - - # transfer (N, C, H, W) back to (N, H, W, C) - y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1)) - self.__assert_close(y_out, y_out2_trans, "batch output") - print 'python: NHWC, NCHW, forward checking passed' - - # test backward now - # NHWC - self.y_grad = np.random.random_sample(x_shape).astype(np.float32) - y_grad = self.y_grad - # y_grad = np.ones(x_shape).astype(np.float32) - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC") - - # NCHW - y_grad2 = np.transpose(y_grad, (0, 3, 1, 2)) - # y_grad2 = np.ones(x_shape2).astype(np.float32) - x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad( - x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW") - - self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient") - self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient") - - x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1)) - self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient") - print 'python: NHWC, NCHW, backward checking passed' - def test_forward_backward(self): def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 - - if len(shape) == 2: - x_shape = shape - c = shape[1] + if data_layout == "NCHW": + n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: - # n, h, w, c = 2, 3, 4, 2 n, h, w, c = shape[0], shape[1], shape[2], shape[3] - if data_format == "NHWC": - x_shape = [n, h, w, c] - elif data_format == "NCHW": - x_shape = [n, c, h, w] - else: - raise ValueError("Unknown data type.") scale_shape = [c] - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) - + np.random.seed(123) + x = np.random.random_sample(shape).astype(np.float32) + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) # run forward - y_out, saved_mean, var_ref = _reference_training( - x_val, scale_val, bias_val, epsilon, data_format) - - # update moving mean and variance + y, saved_mean, var_ref = _reference_training(x, scale, bias, + epsilon, data_layout) mean_out = saved_mean * (1. - momentum) + momentum * mean variance_out = var_ref * (1. - momentum) + momentum * variance saved_variance = 1. / np.sqrt(var_ref + epsilon) - - # for gradient test - # y_grad = np.ones(x_shape).astype(np.float32) - y_grad = np.zeros(x_shape).astype(np.float32) - if len(y_grad.shape) == 2: - y_grad[0, 0] = 1. - else: - y_grad[0, 0, 0, 0] = 1. - # y_grad = np.random.random_sample(x_shape).astype(np.float32) - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, - data_format) - - scope = core.Scope() - - # create input - x_tensor = create_or_get_tensor(scope, "x_val", x_val, place) - scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val, - place) - bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val, - place) - mean_tensor = create_or_get_tensor(scope, "mean", mean, place) - variance_tensor = create_or_get_tensor(scope, "variance", variance, - place) - - # create output - y_tensor = create_or_get_tensor(scope, "y_out", None, place) - saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None, - place) - saved_variance_tensor = create_or_get_tensor( - scope, "saved_variance", None, place) - mean_out_tensor = mean_tensor - variance_out_tensor = variance_tensor - - batch_norm_op = Operator( - "batch_norm", - # inputs - X="x_val", - Scale="scale_val", - Bias="bias_val", - Mean="mean", - Variance="variance", - # outputs - Y="y_out", - MeanOut="mean", - VarianceOut="variance", - SavedMean="saved_mean", - SavedVariance="saved_variance", - # attrs - is_test=False, - data_layout=data_layout, - momentum=momentum, - epsilon=epsilon) - - batch_norm_op.run(scope, place) - - # check forward result - self.__assert_close(y_tensor, y_out, "y_out") - self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean") - self.__assert_close(saved_variance_tensor, saved_variance, - "saved_variance") - self.__assert_close(mean_out_tensor, mean_out, "mean_out") - if isinstance(place, core.CUDAPlace): - atol = 5e-2 - else: - atol = 1e-4 - self.__assert_close(variance_out_tensor, variance_out, - "variance_out", atol) - print "op test forward passed: ", str(place), data_layout - # run backward - batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set()) - set_output_grad( - scope, - ["y_out", "mean", "variance", "saved_mean", "saved_variance"], - place, - feed_dict={"y_out": y_grad}) - batch_norm_op_grad.run(scope, place) - - x_grad_tensor = create_or_get_tensor(scope, - grad_var_name("x_val"), None, - place) - scale_grad_tensor = create_or_get_tensor(scope, - grad_var_name("scale_val"), - None, place) - bias_grad_tensor = create_or_get_tensor(scope, - grad_var_name("bias_val"), - None, place) + y_grad = np.random.random_sample(shape).astype(np.float32) + x_grad, scale_grad, bias_grad = _reference_grad( + x, y_grad, scale, saved_mean, var_ref, epsilon, data_format) + + var_dict = locals() + var_dict['y@GRAD'] = y_grad + + var_names = [ + 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', + 'saved_variance' + ] + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape) + bn_op = block.append_op( + type="batch_norm", + inputs={ + "X": block.var('x'), + "Scale": block.var('scale'), + "Bias": block.var('bias'), + "Mean": block.var('mean'), + "Variance": block.var('variance') + }, + outputs={ + "Y": block.var('y'), + "MeanOut": block.var('mean'), # share the same memory + "VarianceOut": + block.var('variance'), # share the same memory + "SavedMean": block.var('saved_mean'), + "SavedVariance": block.var('saved_variance') + }, + attrs={ + "momentum": momentum, + "epsilon": epsilon, + "is_test": False, + "data_layout": data_layout + }) + block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) + + # generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + bn_op.desc, set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + exe = fluid.Executor(place) + out = exe.run( + program, + feed={ + name: var_dict[name] + for name in + ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] + }, + fetch_list=[ + 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', + 'x@GRAD', 'scale@GRAD', 'bias@GRAD' + ]) + + self.__assert_close(y, out[0], "y") + self.__assert_close(mean_out, out[1], "mean") + self.__assert_close(variance_out, out[2], "variance", 1e-3) + self.__assert_close(saved_mean, out[3], "saved_mean") + self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) + self.__assert_close(x_grad, out[5], "x_grad") + self.__assert_close(scale_grad, out[6], "scale_grad") + self.__assert_close(bias_grad, out[7], "bias_grad") - # check gradient output - self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad") - self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad") - self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad") - print "op test backward passed: ", str(place), data_layout + print "op test forward passed: ", str(place), data_layout places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): @@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest): for place in places: for data_format in ["NCHW", "NHWC"]: test_with_place(place, data_format, [2, 3, 4, 5]) - test_with_place(place, data_format, [2, 3]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_cond_op.py b/python/paddle/fluid/tests/unittests/test_cond_op.py deleted file mode 100644 index 66fbae961a..0000000000 --- a/python/paddle/fluid/tests/unittests/test_cond_op.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import paddle.fluid.core as core -import unittest -import numpy as np -from paddle.fluid.op import Operator, CondOp - - -class PySimpleCond(object): - ''' - A simple implementation of dynamic if-else based on numpy - ''' - - def __init__(self): - array = [1] * 10 - for i in range(1, 10, 2): - array[i] = 0 - self.cond = np.array(array) - self.x = np.ones(shape=(10, 1)).astype("float32") - - def forward(self): - self.index_t = np.where(self.cond == 1) - self.index_f = np.where(self.cond == 0) - y_t = self.x[self.index_t] - y_f = self.x[self.index_f] - y_t = y_t * 2. - y_f = y_f * (-2.) - output = np.zeros(shape=(10, 1)) - output[self.index_t] = y_t - output[self.index_f] = y_f - return output - - -class PySimpleCondTest(unittest.TestCase): - def setUp(self): - self.condnn = PySimpleCond() - - def test_forward(self): - output = self.condnn.forward() - - -def create_tensor(scope, name, shape, np_data): - tensor = scope.var(name).get_tensor() - tensor.set_dims(shape) - tensor.set(np_data, core.CPUPlace()) - return tensor - - -class TestCondOp(unittest.TestCase): - ''' - Test CondOp - - equation: - cond = [True, False, True, False, ...] - y[index_t] = x[index_t] * 2. - y[index_f] = x[index_f] * -2. - outputs: - y - ''' - - def setUp(self): - self.py_cond = PySimpleCond() - - def forward(self): - self.scope = core.Scope() - self.create_global_variables() - self.create_cond_op() - self.create_sub_net() - self.condop.run(self.scope, core.CPUPlace()) - return np.array(self.scope.find_var("Out").get_tensor()) - - def create_global_variables(self): - x_np_data = self.py_cond.x - create_tensor(self.scope, "X", [10, 1], x_np_data) - cond_np_data = self.py_cond.cond.astype("int32") - create_tensor(self.scope, "cond", [10, 1], cond_np_data) - self.scope.var("SubScopes") - self.scope.var("IndexTensors") - self.scope.var("Out") - - def create_cond_op(self): - self.condop = CondOp( - Cond="cond", - Xs=["X"], - Outs=["Out"], - SubScopes="SubScopes", - IndexTensors="IndexTensors") - - def create_sub_net(self): - truenet = core.Net.create() - scale_op_t = Operator("scale", X='X', Out='Out', scale=2.) - truenet.append_op(scale_op_t) - truenet.complete_add_op(True) - self.condop.set_truenet(truenet) - - falsenet = core.Net.create() - scale_op_t = Operator("scale", X='X', Out='Out', scale=-2.) - falsenet.append_op(scale_op_t) - falsenet.complete_add_op(True) - self.condop.set_falsenet(falsenet) - - def test_forward(self): - print 'test cond op forward' - pd_output = self.forward() - py_output = self.py_cond.forward() - print 'pd_output', pd_output - print - print 'py_output', py_output - self.assertEqual(pd_output.shape, py_output.shape) - print 'test passed' - return 0 - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py index 8c67e45b7f..69365db4d1 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py @@ -15,10 +15,8 @@ import unittest import numpy as np from operator import mul -from op_test import OpTest import paddle.fluid.core as core -from paddle.fluid.op import Operator -from paddle.fluid.framework import grad_var_name +import paddle.fluid as fluid np.random.random(123) @@ -70,161 +68,93 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): return grad_x, d_scale, d_bias -def get_backward_op(scope, op, no_grad_set): - backward_op = core.Operator.backward(op, no_grad_set) - for input in backward_op.input_vars(): - var = scope.var(input) - var.get_tensor() - for output in backward_op.output_vars(): - var = scope.var(output) - var.get_tensor() - return backward_op - - -def create_or_get_tensor(scope, var_name, var, place): - tensor = scope.var(var_name).get_tensor() - if var is not None: - assert isinstance(var, np.ndarray) - tensor.set_lod([[]]) - tensor.set_dims(var.shape) - tensor.set(var, place) - return tensor - - -def set_output_grad(scope, outputs, place, feed_dict=None): - def __set_tensor__(name, data=None): - out_tensor = scope.find_var(name).get_tensor() - grad_tensor = scope.var(grad_var_name(name)).get_tensor() - out_dtype = out_tensor.dtype() - if data is None: - if out_dtype == core.VarDesc.VarType.FP64: - data = np.ones(out_tensor.shape(), dtype=np.float64) - elif out_dtype == core.VarDesc.VarType.FP32: - data = np.ones(out_tensor.shape(), dtype=np.float32) - else: - raise ValueError("Not supported data type " + str(out_dtype)) - grad_tensor.set(data, place) - - for output in outputs: - data = None - if output in feed_dict: - data = feed_dict[output] - __set_tensor__(output, data) - - -class TestLayerNormdOp(OpTest): +class TestLayerNormdOp(unittest.TestCase): def __assert_close(self, tensor, np_array, msg, atol=1e-4): self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) - def __assert_grad_close(self, - tensor, - np_array, - name, - place, - max_relative_error=0.02): - a = np.array(tensor) - b = np_array - abs_a = np.abs(a) - abs_a[abs_a < 1e-5] = 1 - - diff_mat = np.abs(a - b) / abs_a - max_diff = np.max(diff_mat) - - def err_msg(): - offset = np.argmax(diff_mat > max_relative_error) - return ("%s Variable %s max gradient diff %f over limit %f, " - "the first error element is %d, %f, %f") % ( - "Gradient Check On %s" % str(place), name, max_diff, - max_relative_error, offset, a.flatten()[offset], - b.flatten()[offset]) - - self.assertLessEqual(max_diff, max_relative_error, err_msg()) - def check_forward_backward(self, shape, begin_norm_axis): - def test_with_place(place, shape, begin_norm_axis=1): - # setUp - assert begin_norm_axis > 0 and begin_norm_axis < len( - shape), 'begin_norm_axis must be between 0 and len(shape)-1.' + def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) + np.random.seed(123) + x = np.random.random_sample(x_shape).astype(np.float32) + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) y_grad = np.random.random_sample(x_shape).astype(np.float32) - # run forward - y_out, saved_mean, var_ref = _reference_layer_norm_naive( - x_val, scale_val, bias_val, epsilon, begin_norm_axis) - naive_fw = {"Y": y_out, "Mean": saved_mean, "Variance": var_ref} - - # get gradient - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis) - naive_grad = { - "X": x_grad_ref, - "Scale": scale_grad_ref, - "Bias": bias_grad_ref - } - - scope = core.Scope() - - # create input - input_map = {"X": x_val, "Scale": scale_val, "Bias": bias_val} - for i_name in input_map: - create_or_get_tensor(scope, i_name, input_map[i_name], place) - - # create output - output_map = {"Y": None, "Mean": None, "Variance": None} - output_tensor = {} - for o_name in output_map: - output_tensor[o_name] = create_or_get_tensor( - scope, o_name, output_map[o_name], place) - - layer_norm_op = Operator( - "layer_norm", - # inputs - X="X", - Scale="Scale", - Bias="Bias", - # outputs - Y="Y", - Mean="Mean", - Variance="Variance", - # attrs - epsilon=epsilon, - begin_norm_axis=begin_norm_axis) - - layer_norm_op.run(scope, place) - - # check forward result - atol = 5e-2 if isinstance(place, core.CUDAPlace) else 1e-4 - for o_tensor in output_tensor: - self.__assert_close(output_tensor[o_tensor], naive_fw[o_tensor], - o_tensor, atol) - - # run backward - layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set()) - set_output_grad( - scope, ["Y", "Mean", "Variance"], - place, - feed_dict={"Y": y_grad}) - layer_norm_op_grad.run(scope, place) - - # get output - grad_tensor = {} - for o_name in naive_grad: - grad_tensor[o_name] = x_ = create_or_get_tensor( - scope, grad_var_name(o_name), None, place) - - # check gradient output - for o_grad in naive_grad: - self.__assert_grad_close(grad_tensor[o_grad], - naive_grad[o_grad], o_grad + "@GRAD", - place) + # reference forward & backward + y, mean, variance = _reference_layer_norm_naive( + x, scale, bias, epsilon, begin_norm_axis) + x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( + x, y_grad, scale, mean, variance, begin_norm_axis) + + var_dict = locals() + var_dict['y@GRAD'] = y_grad + var_names = [ + 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'y@GRAD' + ] + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape) + layer_norm_op = block.append_op( + type="layer_norm", + inputs={ + "X": block.var('x'), + "Scale": block.var('scale'), + "Bias": block.var('bias'), + }, + outputs={ + "Y": block.var('y'), + "Mean": block.var('mean'), # share the same memory + "Variance": + block.var('variance'), # share the same memory + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis + }) + + # generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + layer_norm_op.desc, set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + exe = fluid.Executor(place) + out = exe.run(program, + feed={ + name: var_dict[name] + for name in ['x', 'scale', 'bias', 'y@GRAD'] + }, + fetch_list=[ + 'y', 'mean', 'variance', 'x@GRAD', + 'scale@GRAD', 'bias@GRAD' + ]) + self.__assert_close(y, out[0], "y") + self.__assert_close(mean, out[1], "mean") + self.__assert_close(variance, out[2], "variance", 1e-3) + self.__assert_close(x_grad, out[3], "x_grad") + self.__assert_close(scale_grad, out[4], "scale_grad", 1e-3) + self.__assert_close(bias_grad, out[5], "bias_grad") places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): @@ -237,15 +167,6 @@ class TestLayerNormdOp(OpTest): self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) - def test_check_forward_backward_with_scale(self): - pass # TODO(zcd) - - def test_check_forward_backward_with_bias(self): - pass # TODO(zcd) - - def test_check_forward_backward(self): - pass # TODO(zcd) - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_net.py b/python/paddle/fluid/tests/unittests/test_net.py deleted file mode 100644 index ae1699d647..0000000000 --- a/python/paddle/fluid/tests/unittests/test_net.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid.core as core -from paddle.fluid.op import Operator -import unittest - - -def fc(X, W, Y): - ret_v = core.Net.create() - - ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation")) - ret_v.append_op(Operator("sigmoid", X="pre_activation", Out=Y)) - ret_v.complete_add_op(True) - return ret_v - - -class TestNet(unittest.TestCase): - def test_net_all(self): - net = core.Net.create() - op1 = Operator("sum", X=["X", "Y"], Out="Out") - net.append_op(op1) - - net2 = core.Net.create() - net2.append_op(fc(X="X", W="w", Y="fc.out")) - net2.complete_add_op(True) - net.append_op(net2) - net.complete_add_op(True) - - expected = ''' -Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. - Op(sum), inputs:{X[X, Y]}, outputs:{Out[Out]}. - Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. - Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. - Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. - Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Out[fc.out]}. -''' - self.assertEqual(expected, "\n" + str(net)) - - -if __name__ == "__main__": - unittest.main() -- GitLab