diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 02b96bb413156786db6dc77696c5640b97c10aa4..84e33177740ca1652efc09c8081c2519b4366906 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -74,13 +74,13 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以 .. code-block:: bash - docker run -it --rm paddlepaddle/paddle:0.10.0-dev /bin/bash + docker run -it --rm -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /bin/bash 或者,可以以后台进程方式运行容器: .. code-block:: bash - docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0-dev + docker run -d -p 2202:22 -p 8888:8888 -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /usr/sbin/sshd -D 然后用密码 :code:`root` SSH进入容器: diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 9e98afb3119856776ef80e502379460b8c019d40..03985260241689a099ae9ebc136bd04831a44167 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,7 +7,7 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor) +cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) cc_test(variable_test SRCS variable_test.cc) @@ -15,23 +15,19 @@ cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) cc_test(scope_test SRCS scope_test.cc DEPS scope) -proto_library(attribute_proto SRCS attribute.proto) -proto_library(op_proto SRCS op_proto.proto DEPS attribute_proto) -proto_library(op_desc SRCS op_desc.proto DEPS attribute_proto) -cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) -cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) +proto_library(framework_proto SRCS framework.proto) -cc_library(attribute SRCS attribute.cc DEPS op_desc op_proto) +cc_library(attribute SRCS attribute.cc DEPS framework_proto) -cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope attribute) +cc_library(operator SRCS operator.cc DEPS framework_proto device_context tensor scope attribute) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) -cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS op_proto operator) -cc_library(op_registry SRCS op_registry.cc DEPS op_desc grad_op_builder) +cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) +cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) -py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc.proto) +py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) diff --git a/paddle/framework/attribute.cc b/paddle/framework/attribute.cc index 4c5790693b7e48396e945d09f4fdc72b86aa5978..9eb07acdff1d00dd926f1cee9c24f9f151006d7e 100644 --- a/paddle/framework/attribute.cc +++ b/paddle/framework/attribute.cc @@ -44,7 +44,7 @@ AttrType AttrTypeID>() { return STRINGS; } -Attribute GetAttrValue(const AttrDesc& attr_desc) { +Attribute GetAttrValue(const OpDesc::Attr& attr_desc) { switch (attr_desc.type()) { case paddle::framework::AttrType::INT: { return attr_desc.i(); diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 49a62bedb6aadab5ff05d8aa7dda42fe983314a0..08b47cabd4c2225c50022bd35734dcc2663324d6 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -20,8 +20,7 @@ limitations under the License. */ #include #include -#include "paddle/framework/attribute.pb.h" -#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/framework.pb.h" #include "paddle/platform/enforce.h" #include "paddle/platform/variant.h" @@ -37,7 +36,7 @@ typedef std::unordered_map AttributeMap; template AttrType AttrTypeID(); -Attribute GetAttrValue(const AttrDesc& attr_desc); +Attribute GetAttrValue(const OpDesc::Attr& attr_desc); // check whether a value(attribute) fit a certain limit template diff --git a/paddle/framework/attribute.proto b/paddle/framework/attribute.proto deleted file mode 100644 index 13ae312c10e934566384b8bd0f41dacd6c01fc2f..0000000000000000000000000000000000000000 --- a/paddle/framework/attribute.proto +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -syntax = "proto2"; -package paddle.framework; - -// Attribute Type for paddle's Op. -// Op contains many attributes. Each type of attributes could be different. -// The AttrType will be shared between AttrDesc and AttrProto. -enum AttrType { - INT = 0; - FLOAT = 1; - STRING = 2; - INTS = 3; - FLOATS = 4; - STRINGS = 5; -} \ No newline at end of file diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 437a44a8aafa650d654a1a77c60613abe07679fe..315bdde76d3ffe57b656aa69688def6d274f592c 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -21,15 +21,24 @@ namespace paddle { namespace framework { -static bool AllInSet(const std::vector& names, - const std::string& suffix, - const std::unordered_set& set) { +template +static void ForEachVarName(Map& names, T callback) { for (auto& name : names) { - if (set.find(name + suffix) == set.end()) { - return false; + for (auto& n : name.second) { + if (callback(n)) return; } } - return true; +} + +static bool AllInSet( + const std::map>& names, + const std::string& suffix, const std::unordered_set& set) { + bool all_in_set = true; + ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) { + all_in_set = set.find(n + suffix) != set.end(); + return !all_in_set; + }); + return all_in_set; } static std::shared_ptr NOP() { @@ -68,10 +77,11 @@ std::shared_ptr BackwardRecursive( // Then all input gradients cannot be computed at all, and we put them into // `no_grad_names` set. Return an NOP. if (AllInSet(forwardOp.outputs_, kGradVarSuffix, no_grad_names)) { - for (auto& name : forwardOp.inputs_) { - // Mark all input is not need - no_grad_names.insert(name + kGradVarSuffix); - } + ForEachVarName(forwardOp.inputs_, + [&no_grad_names](const std::string& name) -> bool { + no_grad_names.insert(GradVarName(name)); + return false; + }); return NOP(); } @@ -93,9 +103,11 @@ std::shared_ptr BackwardRecursive( auto fwd = *it; auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); net->AddOp(bwd); - for (auto& out : bwd->outputs_) { - dup_output_ops[out].emplace_back(local_op_id); - } + ForEachVarName(bwd->outputs_, + [&dup_output_ops, local_op_id](const std::string& out) { + dup_output_ops[out].emplace_back(local_op_id); + return false; + }); } // Get unique ID for this method. auto uid = uniq_id++; @@ -117,7 +129,7 @@ std::shared_ptr BackwardRecursive( insert_position.push_back( {dup_op.back(), OpRegistry::CreateOp( - "add", {dup_outputs}, {name}, + "add", {{"X", {dup_outputs}}}, {{"Out", {name}}}, {{"input_format", std::vector{0, static_cast(dup_outputs.size())}}})}); } @@ -131,7 +143,9 @@ std::shared_ptr BackwardRecursive( } else { std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); - for (std::string& grad_input : grad_op->inputs_) { + + ForEachVarName(grad_op->inputs_, [&no_grad_names, + &net](std::string& grad_input) { if (no_grad_names.count(grad_input)) { // +1 for \0 std::string prefix = grad_input.substr( @@ -140,16 +154,19 @@ std::shared_ptr BackwardRecursive( // If part of input gradient of that operator is not calculated, fill // zero variables to that input gradient. - net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {prefix}, - {grad_input}, {})); + net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {{"Src", {prefix}}}, + {{"Dst", {grad_input}}}, {})); } - } - - for (std::string& grad_output : grad_op->outputs_) { - if (no_grad_names.count(grad_output)) { - grad_output = kEmptyVarName; - } - } + return false; + }); + + ForEachVarName(grad_op->outputs_, + [&no_grad_names](std::string& grad_output) { + if (no_grad_names.count(grad_output)) { + grad_output = kEmptyVarName; + } + return false; + }); if (net->ops_.empty()) { // Current no aux op is added to network return grad_op; diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index da3b9c8bed7cd123f2f8ef982a5f0e23abcc0ec7..ebe52d5f284a8d271b666483001544a805d598ac 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -30,8 +30,7 @@ using DeviceContext = platform::DeviceContext; class EmptyOp : public OperatorBase { public: - DEFINE_OPERATOR_CTOR(EmptyOp, OperatorBase) - + using OperatorBase::OperatorBase; void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; @@ -40,9 +39,9 @@ class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { public: RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input X of Add").IgnoreGradient(); - AddInput("b", "Bias of Add").IgnoreGradient(); - AddOutput("Out", "Out of Add").IgnoreGradient(); + AddInput("X", "Input X of Add").AsNoGradient(); + AddInput("b", "Bias of Add").AsNoGradient(); + AddOutput("Out", "Out of Add").AsNoGradient(); AddComment("Add Op"); } }; @@ -51,8 +50,8 @@ class MulOpMaker : public OpProtoAndCheckerMaker { public: MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("A", "A"); - AddInput("B", "B"); + AddInput("X", "A"); + AddInput("Y", "B"); AddOutput("Out", "Out"); AddComment("Mul"); } @@ -63,7 +62,7 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker { SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "X"); - AddOutput("Y", "Y"); + AddOutput("Out", "Y"); AddComment("Sigmoid"); } }; @@ -73,21 +72,25 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "X input"); - AddOutput("Y", "Y output"); + AddOutput("Out", "Y output"); AddComment("NoGradOp, same input output. no Grad"); } }; class FcOp : public operators::NetOp { public: - void Init() override { - AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, - {Output("mul_result")}, {})); - auto b_name = Input("b"); + FcOp(const std::string &type, const VarNameMap &inputs, + const VarNameMap &outputs, const AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AddOp(OpRegistry::CreateOp("mul", + {{"X", {Input("X")}}, {"Y", {Input("W")}}}, + {{"Out", {Output("mul_result")}}}, {})); + auto input_b = Inputs("b"); std::string before_act = "mul_result"; - if (b_name != kEmptyVarName) { - AddOp(OpRegistry::CreateOp("rowwise_add", {Output("mul_result"), b_name}, - {Output("add_result")}, {})); + if (input_b.size() != 0) { + AddOp(OpRegistry::CreateOp( + "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, + {{"Out", {Output("add_result")}}}, {})); before_act = "add_result"; } else { auto out_varname = Output("add_result"); @@ -96,8 +99,8 @@ class FcOp : public operators::NetOp { } } - AddOp(OpRegistry::CreateOp("sigmoid", {Output(before_act)}, {Output("Out")}, - {})); + AddOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, + {{"Out", {Output("Out")}}}, {})); CompleteAddOp(false); } }; @@ -109,8 +112,8 @@ class FcOpMaker : public OpProtoAndCheckerMaker { AddInput("X", "x"); AddInput("W", "w"); AddInput("b", "b"); - AddOutput("mul_result", "").SetTemporary(); - AddOutput("add_result", "").SetTemporary(); + AddOutput("mul_result", "").AsIntermediate(); + AddOutput("add_result", "").AsIntermediate(); AddOutput("Out", ""); AddComment(""); } @@ -141,7 +144,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker { public: AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x").SetMultiple(); + AddInput("X", "x").AsDuplicable(); AddOutput("Y", "y"); AddComment(""); } @@ -167,27 +170,24 @@ REGISTER_OP(many_output_op, f::EmptyOp, f::ManyOutputOpMaker); REGISTER_GRADIENT_OP(many_output_op, many_output_op_grad, f::EmptyOp); TEST(Backward, simple_op_grad) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); ASSERT_NE(fwd, nullptr); auto gop = f::OpRegistry::CreateGradOp(*fwd); - ASSERT_EQ(4UL, gop->inputs_.size()); - ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); + ASSERT_EQ(1UL, gop->inputs_.size()); ASSERT_EQ("rowwise_add_grad", gop->type_); - ASSERT_EQ(f::GradVarName("X"), gop->outputs_[0]); - ASSERT_EQ(f::GradVarName("b"), gop->outputs_[1]); - - ASSERT_EQ(f::GradVarName("X"), gop->Output(f::GradVarName("X"))); + ASSERT_EQ(f::GradVarName("x"), gop->Output(f::GradVarName("X"))); + ASSERT_EQ(f::GradVarName("b"), gop->Output(f::GradVarName("b"))); } TEST(Backward, simple_op_not_need_grad) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); ASSERT_NE(fwd, nullptr); - auto gop = f::Backward(*fwd, {"X"}); - ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), - f::GradVarName("X")), - gop->outputs_.end()); + auto gop = f::Backward(*fwd, {"x"}); + ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName); - auto no_input_gop = f::Backward(*fwd, {"X", "b"}); + auto no_input_gop = f::Backward(*fwd, {"x", "b"}); ASSERT_NE(no_input_gop, nullptr); ASSERT_TRUE(no_input_gop->IsNetOp()); ASSERT_EQ(0UL, @@ -195,8 +195,12 @@ TEST(Backward, simple_op_not_need_grad) { } TEST(Backward, net_fc_backward_normal) { - std::shared_ptr fwd = f::OpRegistry::CreateOp( - "fc", {"X", "w", "b"}, {"mul_result", "add_result", "out"}, {}); + std::shared_ptr fwd = + f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, + {{"mul_result", {"mul_res"}}, + {"add_result", {"add_re"}}, + {"Out", {"out"}}}, + {}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); @@ -218,8 +222,11 @@ TEST(Backward, net_fc_backward_normal) { TEST(Backward, net_fc_backward_not_have_b) { std::shared_ptr fwd = - f::OpRegistry::CreateOp("fc", {"X", "w", f::kEmptyVarName}, - {"mul_result", "add_result", "tmp"}, {}); + f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}}, + {{"mul_result", {"mul_res"}}, + {"add_result", {"add_res"}}, + {"Out", {"tmp"}}}, + {}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); @@ -238,38 +245,49 @@ TEST(Backward, net_fc_backward_not_have_b) { TEST(Backward, net_input_of_network_not_need_grad) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"}, - {"mul_tmp_0", "add_tmp_0", "hidden0"}, {})); - net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"}, - {"mul_tmp_1", "add_tmp_1", "hidden1"}, {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, + {{"mul_result", {"mul_tmp_0"}}, + {"add_result", {"add_tmp_0"}}, + {"Out", {"hidden0"}}}, + {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, + {{"mul_result", {"mul_tmp_1"}}, + {"add_result", {"add_tmp_1"}}, + {"Out", {"hidden1"}}}, + {})); net.CompleteAddOp(); - auto bwd = Backward(net, {"X"}); // X@GRAD is not need. + auto bwd = Backward(net, {"x"}); // x@GRAD is not need. ASSERT_TRUE(bwd->IsNetOp()); auto bwd_net = static_cast(bwd.get()); - std::unordered_set all_output = std::unordered_set( - bwd_net->outputs_.begin(), bwd_net->outputs_.end()); - all_output.erase(f::kEmptyVarName); + auto output_vars = bwd_net->OutputVars(true); + std::unordered_set all_outputs = + std::unordered_set(output_vars.begin(), output_vars.end()); + all_outputs.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_output.find(f::GradVarName(out)), all_output.end()); + ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end()); } // Not Generated X - ASSERT_EQ(all_output.find(f::GradVarName("X")), all_output.end()); + ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output(f::GradVarName("A"))); + first_fc_grad->ops_[2]->Output(f::GradVarName("X"))); } TEST(Backward, net_shared_weight) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {})); - net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {})); + net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, + {{"Out", {"out"}}}, {})); + net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, + {{"Out", {"FinalOut"}}}, {})); net.CompleteAddOp(); auto bwd = f::Backward(net, {}); @@ -280,31 +298,37 @@ TEST(Backward, net_shared_weight) { } TEST(Backward, op_register_grad_not_for_network) { - auto fwd = f::OpRegistry::CreateOp( - "fc", {"X", "W", "b"}, {"mul_out", "add_out", "out1"}, - {{"temporary_index", std::vector{0, 1}}}); + auto fwd = + f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, + {{"mul_result", {"mul_out"}}, + {"add_result", {"add_out"}}, + {"Out", {"out1"}}}, + {{"temporary_index", std::vector{0, 1}}}); ASSERT_THROW(f::OpRegistry::CreateGradOp(*fwd), EnforceNotMet); } TEST(Backward, op_all_input_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); - auto backward = f::Backward(*fwd, {"X", "b"}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); + auto backward = f::Backward(*fwd, {"x", "b"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } TEST(Backward, op_all_output_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); - auto backward = f::Backward(*fwd, {"Out"}); + auto fwd = f::OpRegistry::CreateOp( + "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); + auto backward = f::Backward(*fwd, {"out"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } TEST(Backward, op_part_of_output_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {}); + auto fwd = f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}}, + {{"y", {"Y"}}, {"z", {"Z"}}}, {}); auto backward = f::Backward(*fwd, {"Z"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); @@ -312,10 +336,10 @@ TEST(Backward, op_part_of_output_are_not_need) { auto &fill_zero = *net->ops_[0]; ASSERT_EQ("fill_zeros_like", fill_zero.type_); - ASSERT_EQ(1UL, fill_zero.inputs_.size()); - ASSERT_EQ("Z", fill_zero.inputs_[0]); - ASSERT_EQ(1UL, fill_zero.outputs_.size()); - ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.outputs_[0]); + ASSERT_EQ(1UL, fill_zero.Inputs("Src").size()); + ASSERT_EQ("Z", fill_zero.Input("Src")); + ASSERT_EQ(1UL, fill_zero.Outputs("Dst").size()); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Dst")); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.type_); @@ -327,44 +351,62 @@ TEST(Backward, op_part_of_output_are_not_need) { } TEST(Backward, op_part_of_input_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("mul", {"a", "b"}, {"out"}, {}); + auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}}, + {{"Out", {"out"}}}, {}); auto backward = f::Backward(*fwd, {"a"}); auto &grad_mul = *backward; ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL); - ASSERT_EQ(grad_mul.Output(f::GradVarName("A")), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output(f::GradVarName("B")), f::GradVarName("b")); + ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName); + ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b")); ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); - ASSERT_EQ(grad_mul.Input("A"), "a"); - ASSERT_EQ(grad_mul.Input("B"), "b"); + ASSERT_EQ(grad_mul.Input("X"), "a"); + ASSERT_EQ(grad_mul.Input("Y"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); } TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"}, - {"mul_out1", "add_out1", "out1"}, {})); - net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"}, - {"mul_out2", "tmp_out2", "out2"}, {})); - net.AddOp(f::OpRegistry::CreateOp("fc", {"out2", "w3", "b3"}, - {"mul_out3", "tmp_out3", "out3"}, {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, + {{"mul_result", {"mul_out1"}}, + {"add_result", {"add_out1"}}, + {"Out", {"out1"}}}, + {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, + {{"mul_result", {"mul_out2"}}, + {"add_result", {"tmp_out2"}}, + {"Out", {"out2"}}}, + {})); + net.AddOp(f::OpRegistry::CreateOp( + "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, + {{"mul_result", {"mul_out3"}}, + {"add_result", {"tmp_out3"}}, + {"Out", {"out3"}}}, + {})); net.CompleteAddOp(); + auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); ASSERT_TRUE(backward->IsNetOp()); auto bwd_net = static_cast(backward.get()); ASSERT_EQ(bwd_net->ops_.size(), 3UL); auto &grad_fc = *bwd_net->ops_[0]; - EXPECT_EQ(grad_fc.inputs_.size(), - 3UL /* external input number */ + + const char *all = paddle::operators::NetOp::kAll; + EXPECT_EQ(grad_fc.inputs_[all].size(), + 2UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ + 2U /* internal variable number*/); - EXPECT_EQ(grad_fc.outputs_.size(), 2UL /* input number of mul*/ - + 2UL /* input number of rowwise_add */ - + 1UL /* input number of sigmod */); - EXPECT_EQ(bwd_net->ops_[1]->inputs_.size(), 0UL); - EXPECT_EQ(bwd_net->ops_[1]->outputs_.size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->inputs_.size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->outputs_.size(), 0UL); + EXPECT_EQ(grad_fc.outputs_[all].size(), + 2UL /* input number of mul*/ + + 2UL /* input number of rowwise_add + */ + + 1UL /* input number of sigmod */); + EXPECT_EQ(bwd_net->ops_[1]->inputs_[all].size(), 0UL); + EXPECT_EQ(bwd_net->ops_[1]->outputs_[all].size(), 0UL); + EXPECT_EQ(bwd_net->ops_[2]->inputs_[all].size(), 0UL); + EXPECT_EQ(bwd_net->ops_[2]->outputs_[all].size(), 0UL); } diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 545c1dcc2a1682839d90194002fdbb748d85e808..cfd3e8dfdec0e92620aef5cd246b4622b779ce19 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -283,6 +283,5 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { DDim::DDim(std::initializer_list init_list) { *this = make_ddim(init_list); } - } // namespace framework } // namespace paddle diff --git a/paddle/framework/details/lod_tensor.cc b/paddle/framework/details/lod_tensor.cc deleted file mode 100644 index 9ad3979e5b511517f75d2d43004f97ee1576953b..0000000000000000000000000000000000000000 --- a/paddle/framework/details/lod_tensor.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/lod_tensor.h" - -#include - -namespace paddle { -namespace framework { -namespace details { - -using LOD = LODTensor::LOD; - -std::shared_ptr SliceLOD(const LOD &lod, size_t level_begin, - size_t level_end) { - auto new_lod = std::make_shared(); - new_lod->reserve(level_end - level_begin); - for (size_t i = level_begin; i < level_end; i++) { - new_lod->emplace_back(lod[i]); - } - return new_lod; -} - -std::shared_ptr SliceLOD(const LOD &lod, size_t level, size_t elem_begin, - size_t elem_end, bool tensor_shared) { - // slice the lod. - auto new_lod = std::make_shared(); - new_lod->reserve(lod.size() - level); - auto start = lod.at(level)[elem_begin]; - auto end = lod.at(level)[elem_end]; - - for (auto it = lod.begin() + level; it != lod.end(); it++) { - auto it_begin = std::find(it->begin(), it->end(), start); - auto it_end = std::find(it_begin, it->end(), end); - PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); - PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); - new_lod->emplace_back(it_begin, it_end + 1); - if (!tensor_shared) { - // reset offset if tensor is copyed and sliced. - std::transform(new_lod->back().begin(), new_lod->back().end(), - new_lod->back().begin(), - [start](int v) { return v - start; }); - PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD"); - } - } - return new_lod; -} - -} // namespace details -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/details/lod_tensor.h b/paddle/framework/details/lod_tensor.h deleted file mode 100644 index 9a6a6cd2ea41f02db991bdc0a2b917433dafed99..0000000000000000000000000000000000000000 --- a/paddle/framework/details/lod_tensor.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include - -namespace paddle { -namespace framework { -namespace details { - -/* - * Slice levels from LOD. - * - * @lod: LOD to slice. - * @level_begin: level to begin slice. - * @level_end: level to end slice. - */ -std::shared_ptr SliceLOD(const LODTensor::LOD &lod, - size_t level_begin, size_t level_end); - -/* - * Slice elements from a level of LOD. - * - * @lod: LOD to slice. - * @level: which level to slice. - * @elem_begin: element's index to begin slice. - * @elem_end: element's index to end slice. - */ -std::shared_ptr SliceLOD(const LODTensor::LOD &lod, - size_t level, size_t elem_begin, - size_t elem_end, bool tensor_shared); -} // namespace details -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto new file mode 100644 index 0000000000000000000000000000000000000000..7077e8aa2c77c24efdbb34ed3a13821fe7678455 --- /dev/null +++ b/paddle/framework/framework.proto @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +syntax = "proto2"; +package paddle.framework; + +enum AttrType { + INT = 0; + FLOAT = 1; + STRING = 2; + INTS = 3; + FLOATS = 4; + STRINGS = 5; +} + +// OpDesc describes an instance of a C++ framework::OperatorBase +// derived class type. +message OpDesc { + + message Attr { + required string name = 1; + required AttrType type = 2; + optional int32 i = 3; + optional float f = 4; + optional string s = 5; + repeated int32 ints = 6; + repeated float floats = 7; + repeated string strings = 8; + }; + + message Var { + required string parameter = 1; + repeated string arguments = 2; + }; + + required string type = 3; + repeated Var inputs = 1; + repeated Var outputs = 2; + repeated Attr attrs = 4; +}; + +// OpProto describes a C++ framework::OperatorBase derived class. +message OpProto { + + // VarProto describes the C++ type framework::Variable. + message Var { + required string name = 1; + required string comment = 2; + + optional bool duplicable = 3 [ default = false ]; + optional bool intermediate = 4 [ default = false ]; + optional bool no_gradient = 5 [ default = false ]; + } + + // AttrProto describes the C++ type Attribute. + message Attr { + required string name = 1; + required AttrType type = 2; + required string comment = 3; + // If that attribute is generated, it means the Paddle third + // language binding has responsibility to fill that + // attribute. End-User should not set that attribute. + optional bool generated = 4 [ default = false ]; + } + + required string type = 1; + repeated Var inputs = 2; + repeated Var outputs = 3; + repeated Attr attrs = 4; + required string comment = 5; +} diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 8bd2bc590272256fed79f4ab38ad52b470e87012..21bc30d1fbdae31548547bccf39e78fe16eedfaa 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -13,105 +13,52 @@ express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/grad_op_builder.h" -#include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace framework { - -typedef std::vector Ints; - enum class OpArgType { IN, OUT }; -const Ints* AttrFormat(const AttributeMap& attrs, const std::string& key) { - return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; -} - -Ints* AttrFormat(AttributeMap& attrs, const std::string& key) { - return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; -} - static void TransOpArg(const OperatorBase* src_op, - std::vector& grad_inputs, - std::vector& grad_outputs, - AttributeMap& grad_attrs, - std::unordered_map& grad_idxs, - const std::string& src_type, const std::string& dst_type, - int& idx, bool is_grad) { - const std::vector& src_inout = - (src_type == "input_format") ? src_op->inputs_ : src_op->outputs_; - - const std::vector* src_format = AttrFormat(src_op->Attrs(), src_type); - - std::vector& dst_inout = - (dst_type == "input_format") ? grad_inputs : grad_outputs; - - std::vector* dst_format = AttrFormat(grad_attrs, dst_type); - - const OpProto& proto = OpRegistry::protos().at(src_op->type_); + OperatorBase::VarNameMap* vars, + const OpArgType& src_type, bool is_grad) { + const auto& src_inout = + src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; + auto& dst_inout = *vars; + const OpProto& proto = OpProtos().at(src_op->type_); const auto& src_arg_list = - (src_type == "input_format") ? proto.inputs() : proto.outputs(); - + src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); for (const auto& arg : src_arg_list) { - std::string src_name = arg.name(); - std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name; - grad_idxs[dst_name] = idx++; - int src_arg_idx = src_op->in_out_idxs_->at(src_name); - int src_begin = - src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); - int src_end = src_format == nullptr ? src_arg_idx + 1 - : src_format->at(src_arg_idx + 1); - for (int i = src_begin; i < src_end; ++i) { - std::string s = - is_grad ? src_inout[i] + kGradVarSuffix - : (arg.ignore_gradient() ? kEmptyVarName : src_inout[i]); - dst_inout.emplace_back(s); - } - if (dst_format != nullptr) { - dst_format->push_back(dst_inout.size()); + if (arg.no_gradient() && !is_grad) continue; + const std::string src_name = arg.name(); + std::string dst_name = is_grad ? GradVarName(src_name) : src_name; + dst_inout[dst_name].reserve(src_inout.at(src_name).size()); + for (auto& var_name : src_inout.at(src_name)) { + std::string s = is_grad ? GradVarName(var_name) : var_name; + dst_inout[dst_name].emplace_back(s); } } } OperatorBase* BuildGradOp(const OperatorBase* op) { - const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type()); - - AttributeMap grad_attrs(op->Attrs()); - grad_attrs.erase("input_format"); - grad_attrs.erase("output_format"); - if (op->Attrs().count("input_format") > 0) { - grad_attrs["output_format"] = std::vector({0}); - } - if (op->Attrs().count("input_format") > 0 || - op->Attrs().count("output_format") > 0) { - grad_attrs["input_format"] = std::vector({0}); - } - - std::vector grad_inputs, grad_outputs; - - using VarIndexMap = std::unordered_map; - VarIndexMap* grad_idxs = new VarIndexMap; - int in_idx = 0; - int out_idx = 0; - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, - "input_format", "input_format", in_idx, false); // I - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, - "output_format", "input_format", in_idx, false); // G - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, - "output_format", "input_format", in_idx, true); // OG - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, - "input_format", "output_format", out_idx, true); // IG - - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - - grad_op->type_ = grad_op_type; - grad_op->inputs_ = grad_inputs; - grad_op->outputs_ = grad_outputs; - grad_op->attrs_ = grad_attrs; - grad_op->in_out_idxs_.reset(grad_idxs); - - return grad_op; + auto gop_type_it = OpRegistry::grad_ops().find(op->type_); + PADDLE_ENFORCE(gop_type_it != OpRegistry::grad_ops().end(), + "Operator %s do not register gradient type", op->type_); + auto& grad_op_type = gop_type_it->second; + OperatorBase::VarNameMap inputs; + OperatorBase::VarNameMap outputs; + TransOpArg(op, &inputs, OpArgType::IN, false); // I + TransOpArg(op, &inputs, OpArgType::OUT, false); // O + TransOpArg(op, &inputs, OpArgType::OUT, true); // OG + TransOpArg(op, &outputs, OpArgType::IN, true); // IG + auto gop_it = OpRegistry::op_creators().find(grad_op_type); + PADDLE_ENFORCE(gop_it != OpRegistry::op_creators().end(), + "Operator %s 's Gradient %s's creator cannot be found", + op->type_, grad_op_type); + + return gop_it->second(grad_op_type, inputs, outputs, op->attrs_); } } // namespace framework diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index 19e552b7458c966d473bdee99515a2beee1f6089..ebaf84545fce0d281d8821861264cddc8854893d 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -10,8 +10,7 @@ namespace framework { class NOP : public OperatorBase { public: - DEFINE_OPERATOR_CTOR(NOP, OperatorBase) - + using OperatorBase::OperatorBase; void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const platform::DeviceContext &dev_ctx) const override {} @@ -22,10 +21,10 @@ class MutiInOutOpMaker : public OpProtoAndCheckerMaker { MutiInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("In1", "a single input"); - AddInput("In2_mult", "a multiple input").SetMultiple(); + AddInput("In2_mult", "a multiple input").AsDuplicable(); AddInput("In3", "another single input"); AddOutput("Out1", "a single output"); - AddOutput("Out2_mult", "a multiple output").SetMultiple(); + AddOutput("Out2_mult", "a multiple output").AsDuplicable(); AddComment("test op with multiple inputs and outputs"); } }; @@ -35,10 +34,10 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("In1", "a single input"); - AddInput("In2_mult", "a multiple input").SetMultiple().IgnoreGradient(); - AddInput("In3_mult", "another multiple input").SetMultiple(); - AddOutput("Out1_mult", "a multiple output").SetMultiple(); - AddOutput("Out2", "a single output").IgnoreGradient(); + AddInput("In2_mult", "a multiple input").AsDuplicable().AsNoGradient(); + AddInput("In3_mult", "another multiple input").AsDuplicable(); + AddOutput("Out1_mult", "a multiple output").AsDuplicable(); + AddOutput("Out2", "a single output").AsNoGradient(); AddComment("op with inputs and outputs ignored in gradient calculating"); } }; @@ -49,18 +48,18 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { namespace f = paddle::framework; TEST(GradOpBuilder, AddTwo) { - std::shared_ptr add_op( - f::OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {})); + std::shared_ptr add_op(f::OpRegistry::CreateOp( + "add_two", {{"X", {"x"}}, {"Y", {"y"}}}, {{"Out", {"out"}}}, {})); std::shared_ptr grad_add_op = f::OpRegistry::CreateGradOp(*add_op); - EXPECT_EQ(static_cast(grad_add_op->inputs_.size()), 4); - EXPECT_EQ(static_cast(grad_add_op->outputs_.size()), 2); + EXPECT_EQ(grad_add_op->inputs_.size(), 4UL); + EXPECT_EQ(grad_add_op->outputs_.size(), 2UL); EXPECT_EQ(grad_add_op->Input("X"), "x"); EXPECT_EQ(grad_add_op->Input("Y"), "y"); EXPECT_EQ(grad_add_op->Input("Out"), "out"); - EXPECT_EQ(grad_add_op->Input("Out@GRAD"), "out@GRAD"); - EXPECT_EQ(grad_add_op->Output("X@GRAD"), "x@GRAD"); - EXPECT_EQ(grad_add_op->Output("Y@GRAD"), "y@GRAD"); + EXPECT_EQ(grad_add_op->Input(f::GradVarName("Out")), f::GradVarName("out")); + EXPECT_EQ(grad_add_op->Output(f::GradVarName("X")), f::GradVarName("x")); + EXPECT_EQ(grad_add_op->Output(f::GradVarName("Y")), f::GradVarName("y")); } REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker); @@ -69,15 +68,15 @@ REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker); REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::NOP); TEST(GradOpBuilder, MutiInOut) { - f::AttributeMap attrs{{"input_format", std::vector{0, 1, 4, 5}}, - {"output_format", std::vector{0, 1, 3}}}; std::shared_ptr test_op(f::OpRegistry::CreateOp( - "mult_io", {"in1", "in2_1", "in2_2", "in2_3", "in3"}, - {"out1", "out2_1", "out2_2"}, attrs)); + "mult_io", {{"In1", {"in1"}}, + {"In2_mult", {"in2_1", "in2_2", "in2_3"}}, + {"In3", {"in3"}}}, + {{"Out1", {"out1"}}, {"Out2_mult", {"out2_1", "out2_2"}}}, {})); std::shared_ptr grad_test_op = f::OpRegistry::CreateGradOp(*test_op); - ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + ASSERT_EQ(grad_test_op->inputs_.size(), 3UL + 2UL + 2UL); EXPECT_EQ(grad_test_op->Input("In1"), "in1"); EXPECT_EQ(grad_test_op->Inputs("In2_mult"), std::vector({"in2_1", "in2_2", "in2_3"})); @@ -91,7 +90,7 @@ TEST(GradOpBuilder, MutiInOut) { std::vector( {f::GradVarName("out2_1"), f::GradVarName("out2_2")})); - ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); + ASSERT_EQ(grad_test_op->outputs_.size(), 3UL); EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), std::vector({f::GradVarName("in2_1"), @@ -101,31 +100,28 @@ TEST(GradOpBuilder, MutiInOut) { } TEST(GradOpBuilder, IOIgnoredInGradient) { - f::AttributeMap attrs{{"input_format", std::vector{0, 1, 3, 5}}, - {"output_format", std::vector{0, 2, 3}}}; std::shared_ptr test_op(f::OpRegistry::CreateOp( - "io_ignored", {"in1", "in2_1", "in2_2", "in3_1", "in3_2"}, - {"out1_1", "out1_2", "out2"}, attrs)); + "io_ignored", {{"In1", {"in1"}}, + {"In2_mult", {"in2_1", "in2_2"}}, + {"In3_mult", {"in3_1", "in3_2"}}}, + {{"Out1_mult", {"out1_1", "out1_2"}}, {"Out2", {"out2"}}}, {})); std::shared_ptr grad_test_op = f::OpRegistry::CreateGradOp(*test_op); // 'In2' and 'Out2' are ignored in gradient calculating - ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + ASSERT_EQ(grad_test_op->inputs_.size(), 2UL + 1UL + 2UL); EXPECT_EQ(grad_test_op->Input("In1"), "in1"); - EXPECT_EQ(grad_test_op->Inputs("In2_mult"), - std::vector({f::kEmptyVarName, f::kEmptyVarName})); EXPECT_EQ(grad_test_op->Inputs("In3_mult"), std::vector({"in3_1", "in3_2"})); EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), std::vector({"out1_1", "out1_2"})); - EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")), std::vector( {f::GradVarName("out1_1"), f::GradVarName("out1_2")})); EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")), f::GradVarName("out2")); - ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); + ASSERT_EQ(grad_test_op->outputs_.size(), 3UL); EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), std::vector( diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 70045dbf7afd0935e4df852b2f0e3ecd163a9316..2b178907747b3911292b070b65160a24c120b726 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -19,32 +19,59 @@ namespace paddle { namespace framework { -LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); - // slice levels just need to update LOD info, each level will contains the - // whole tensor_, so no need to modify tensor_. - return LODTensor(tensor_, new_lod); +LODTensor::LOD LODTensor::LOD::SliceLevels(size_t level_begin, + size_t level_end) const { + LOD new_lod; + new_lod.reserve(level_end - level_begin); + for (size_t i = level_begin; i < level_end; i++) { + new_lod.emplace_back(at(i)); + } + return new_lod; } -LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin, - size_t elem_end) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); - PADDLE_ENFORCE(elem_begin < NumElements(level), - "element begin [%d] out of range [%d]", elem_begin, - NumElements(level)); - PADDLE_ENFORCE(elem_end < NumElements(level) + 1, - "element end [%d] out of range [%d]", elem_end, - NumElements(level)); - - auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, - true /*tensor_shared*/); - - // slice elements just need to update LOD info, because offsets are not - // changed, so the original tensor_ can be reused. - return LODTensor(tensor_, new_lod); +LODTensor::LOD LODTensor::LOD::SliceInLevel(size_t level, size_t elem_begin, + size_t elem_end) const { + // slice the lod. + LOD new_lod; + new_lod.reserve(size() - level); + auto start = this->at(level)[elem_begin]; + auto end = this->at(level)[elem_end]; + + for (auto it = this->begin() + level; it != this->end(); it++) { + auto it_begin = std::find(it->begin(), it->end(), start); + auto it_end = std::find(it_begin, it->end(), end); + PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); + PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); + new_lod.emplace_back(it_begin, it_end + 1); + // reset offset if tensor is copyed and sliced. + std::transform(new_lod.back().begin(), new_lod.back().end(), + new_lod.back().begin(), + [start](int v) { return v - start; }); + PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LOD"); + } + PADDLE_ENFORCE_LE(new_lod.size(), this->size()); + return new_lod; +} + +bool operator==(const LODTensor::LOD& a, const LODTensor::LOD& b) { + if (a.size() != b.size()) { + return false; + } + + for (size_t i = 0; i < a.size(); i++) { + const auto& a_level = a[i]; + const auto& b_level = b[i]; + if (a_level.size() != b_level.size()) { + return false; + } + for (size_t j = 0; j < a_level.size(); j++) { + if (a_level[j] != b_level[j]) { + return false; + } + } + } + + return true; } } // namespace framework diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 4933479b109694312e99595dc8ad6db70259efa6..9e27aec38d336db8a4f0adbed098d299aa741356 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -15,7 +15,7 @@ #pragma once #include -#if (!PADDLE_ONLY_CPU) +#if !defined(PADDLE_ONLY_CPU) #include #include #endif @@ -31,30 +31,29 @@ namespace framework { * LODTensor (Level of details Tensor) * see https://en.wikipedia.org/wiki/Level_of_details for reference. */ -class LODTensor { +class LODTensor : public Tensor { public: // Level save offsets of each unit. #ifdef PADDLE_ONLY_CPU - using Level = std::vector; + template + using Vector = std::vector; #else - using Level = thrust::device_vector; + template + using Vector = thrust::host_vector; #endif - // LOD stores offsets of each level of units, the largest units level first, + // LoD stores offsets of each level of units, the largest units level first, // then the smaller units level. Each Level stores the offsets of units in // Tesor. - typedef std::vector LOD; + class LOD : public std::vector> { + public: + LOD SliceLevels(size_t level_begin, size_t level_end) const; + LOD SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) const; + }; LODTensor() {} - LODTensor(const std::shared_ptr &tensor, - const std::shared_ptr &lod) { - Reset(tensor, lod); - } + explicit LODTensor(const LOD &lod) : lod_(lod) {} - void Reset(const std::shared_ptr &tensor, - const std::shared_ptr &lod) { - tensor_ = tensor; - lod_start_pos_ = lod; - } + virtual Tensor *Clone() const { return new LODTensor(lod_); } /* * Get a element from LOD. @@ -65,16 +64,14 @@ class LODTensor { PADDLE_ENFORCE(elem < NumElements(level), "element begin [%d] out of range [%d]", elem, NumElements(level)); - return (*lod_start_pos_)[level][elem]; + return (lod_)[level][elem]; } /* * Number of LODTensor's levels, each level has units of data, for example, * in the sentence's view, article, paragraph, sentence are 3 levels. */ - size_t NumLevels() const { - return lod_start_pos_ ? lod_start_pos_->size() : 0UL; - } + size_t NumLevels() const { return lod_.size(); } /* * Number of elements in a level. */ @@ -82,64 +79,71 @@ class LODTensor { PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, NumLevels()); // the last offset is the end of last element - return lod_start_pos_->at(level).size() - 1; + return lod_[level].size() - 1; } - /* - * Slice of levels[level_begin:level_end], with tensor copied. - */ - template - LODTensor SliceCopied(size_t level_begin, size_t level_end, - const platform::Place &dst_place) const; - /* * Slice of levels[level_begin:level_end], with tensor shared. */ - LODTensor SliceShared(size_t level_begin, size_t level_end) const; - - /* - * Slice of elements of a level, [elem_begin: elem_end], with tensor copied. - * @note: low performance in slice lod_start_pos_. - */ template - LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end, - const platform::Place &dst_place) const; + LODTensor SliceLevels(size_t level_begin, size_t level_end) const; /* * Slice of elements of a level, [elem_begin: elem_end], with tensor shared. - * @note: low performance in slice lod_start_pos_. - */ - LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const; - - /* - * Copy other's lod_start_pos_, to share LOD info. - * @note: the LOD info should not be changed. + * @note: low performance in slice lod_. */ - void ShareLOD(const LODTensor &other) { - lod_start_pos_ = other.lod_start_pos_; - } + template + LODTensor SliceInLevel(size_t level, size_t elem_begin, + size_t elem_end) const; /* - * Copy other's lod_start_pos_'s content, free to mutate. + * Copy other's lod_'s content, free to mutate. */ - void CopyLOD(const LODTensor &other) { - lod_start_pos_ = std::make_shared(*other.lod_start_pos_); - } + void CopyLOD(const LODTensor &other) { lod_ = other.lod_; } /* * Determine whether LODTensor has a valid LOD info. */ - bool HasLOD() const { return bool(lod_start_pos_); } - LOD *lod() const { return lod_start_pos_.get(); } + const LOD &lod() const { return lod_; } + LOD *mutable_lod() { return &lod_; } - std::shared_ptr &tensor() { return tensor_; } - Tensor *raw_tensor() { return tensor_.get(); } + virtual ~LODTensor() {} private: - std::shared_ptr lod_start_pos_; - std::shared_ptr tensor_; + LOD lod_; }; +bool operator==(const LODTensor::LOD &a, const LODTensor::LOD &b); + +template +LODTensor LODTensor::SliceLevels(size_t level_begin, size_t level_end) const { + auto new_lod = lod_.SliceLevels(level_begin, level_end); + // slice levels just need to update LOD info, each level will contains the + // whole tensor_, so no need to modify tensor_. + LODTensor new_tensor(new_lod); + new_tensor.ShareDataWith(*this); + return new_tensor; +} + +template +LODTensor LODTensor::SliceInLevel(size_t level, size_t elem_begin, + size_t elem_end) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = lod_.SliceInLevel(level, elem_begin, elem_end); + + // slice elements just need to update LOD info, because offsets are not + // changed, so the original tensor_ can be reused. + LODTensor new_tensor(new_lod); + new_tensor.ShareDataWith(*this); + return new_tensor; +} + } // namespace framework } // namespace paddle - -#include "paddle/framework/lod_tensor_impl.h" diff --git a/paddle/framework/lod_tensor_impl.h b/paddle/framework/lod_tensor_impl.h deleted file mode 100644 index 0eb6469aea3ae25f035751da985b5bebb489d961..0000000000000000000000000000000000000000 --- a/paddle/framework/lod_tensor_impl.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include "paddle/framework/details/lod_tensor.h" - -namespace paddle { -namespace framework { - -template -LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end, - const platform::Place &dst_place) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); - auto new_tensor = std::make_shared(); - new_tensor->CopyFrom(*tensor_, dst_place); - - return LODTensor(new_tensor, new_lod); -} - -template -LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin, - size_t elem_end, - const platform::Place &dst_place) const { - PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); - PADDLE_ENFORCE(elem_begin < NumElements(level), - "element begin [%d] out of range [%d]", elem_begin, - NumElements(level)); - PADDLE_ENFORCE(elem_end < NumElements(level) + 1, - "element end [%d] out of range [%d]", elem_end, - NumElements(level)); - - auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, - false /*tensor_shared*/); - - auto start_idx = new_lod->front().front(); - auto end_idx = new_lod->front().back() - 1 /*the next element's start*/; - auto sliced_tensor = tensor_->Slice(start_idx, end_idx); - auto new_tensor = std::make_shared(); - new_tensor->CopyFrom(sliced_tensor, dst_place); - - return LODTensor(new_tensor, new_lod); -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 511716375e81e8fd89b071c940ee97327c268b8b..2881136ced6ef957a192e303e529b9b2867b3dda 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -15,6 +15,7 @@ #include #include +#include #include namespace paddle { @@ -29,22 +30,28 @@ class LODTensorTester : public ::testing::Test { // 0 10 20 // 0 5 10 15 20 // 0 2 5 7 10 12 15 20 - auto lod = std::make_shared(); - lod->push_back(std::vector{0, 10, 20}); - lod->push_back(std::vector{0, 5, 10, 15, 20}); - lod->push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); + LODTensor::LOD lod; + lod.push_back(std::vector{0, 10, 20}); + lod.push_back(std::vector{0, 5, 10, 15, 20}); + lod.push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); - auto tensor = std::make_shared(); - tensor->Resize({20 /*batch size*/, 128 /*dim*/}); + ASSERT_EQ(lod.size(), 3UL); + + tensor.Resize({20 /*batch size*/, 128 /*dim*/}); // malloc memory - tensor->mutable_data(place); + tensor.mutable_data(place); + + lod_tensor.reset(new LODTensor(lod)); + lod_tensor->Resize({20 /*batch size*/, 128 /*dim*/}); - lod_tensor->Reset(tensor, lod); + lod_tensor->ShareDataWith(tensor); + // lod_tensor->ShareDataWith(tensor); } protected: std::unique_ptr lod_tensor; platform::CPUPlace place; + Tensor tensor; }; TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); } @@ -55,110 +62,54 @@ TEST_F(LODTensorTester, NumElements) { ASSERT_EQ(lod_tensor->NumElements(2), 8UL); } -TEST_F(LODTensorTester, SliceShared_Level) { - // slice 1 level - for (size_t level = 0; level < 3UL; ++level) { - auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1); - ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); - ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); - ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - } - // slice 2 level - for (size_t level = 0; level < 2UL; ++level) { - auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2); - ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); - ASSERT_EQ(new_lod_tensor.NumElements(1), - lod_tensor->NumElements(level + 1)); - ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - } -} - -TEST_F(LODTensorTester, SliceCopied_Level) { +TEST_F(LODTensorTester, SliceLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { - auto new_lod_tensor = - lod_tensor->SliceCopied(level, level + 1, place); + auto new_lod_tensor = lod_tensor->SliceLevels(level, level + 1); ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); - // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - // TODO(superjom) add tensor comparation here. + // ASSERT_EQ(new_lod_tensor, *lod_tensor); } // slice 2 level for (size_t level = 0; level < 2UL; ++level) { - auto new_lod_tensor = - lod_tensor->SliceCopied(level, level + 2, place); + auto new_lod_tensor = lod_tensor->SliceLevels(level, level + 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); ASSERT_EQ(new_lod_tensor.NumElements(1), lod_tensor->NumElements(level + 1)); - // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); - // TODO(superjom) add tensor comparation here. + ASSERT_EQ(new_lod_tensor.data(), lod_tensor->data()); } } -TEST_F(LODTensorTester, SliceShared_Element) { - size_t level = 0; - auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); - ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); - ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); - - level = 1; - new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); - ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); -} - -TEST_F(LODTensorTester, SliceCopied_Element) { +TEST_F(LODTensorTester, SliceInLevel) { size_t level = 0; - auto new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); - ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); - ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + auto new_lod_tensor = lod_tensor->SliceInLevel(level, 0, 2); + EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); + EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL); + EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL); + EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor->data()); level = 1; - new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + new_lod_tensor = lod_tensor->SliceInLevel(level, 0, 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); - - level = 1; - // LOD is - // 0 5 10 - // 0 2 5 7 10 - new_lod_tensor = lod_tensor->SliceCopied(level, 1, 3, place); - ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - - ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL); - ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL); - ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL); - - // TODO(superjom) compare the content of these tensors + ASSERT_EQ(new_lod_tensor.data(), lod_tensor->data()); } TEST_F(LODTensorTester, ShareLOD) { LODTensor new_lod_tensor; - new_lod_tensor.ShareLOD(*lod_tensor); + new_lod_tensor.CopyLOD(*lod_tensor); ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod()); } TEST_F(LODTensorTester, CopyLOD) { LODTensor new_lod_tensor; new_lod_tensor.CopyLOD(*lod_tensor); - ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod()); + bool equals = std::equal(lod_tensor->lod().begin(), lod_tensor->lod().end(), + new_lod_tensor.lod().begin()); + ASSERT_TRUE(equals); } } // namespace framework diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto deleted file mode 100644 index d95ba26f88ae181f991440e0df30c80f80a7eb2a..0000000000000000000000000000000000000000 --- a/paddle/framework/op_desc.proto +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -syntax = "proto2"; -package paddle.framework; - -import "attribute.proto"; - -// AttrDesc is used to describe Attributes of an Operator. It contain's -// name, type, and value of Attribute. -// -// e.g, for scale=3.0: name=scala, type=AttrType.FLOAT, value=3.0 -message AttrDesc { - required string name = 1; - required AttrType type = 2; - optional int32 i = 3; - optional float f = 4; - optional string s = 5; - repeated int32 ints = 6; - repeated float floats = 7; - repeated string strings = 8; -}; - -// Protocol Message to describe an Operator. -// -// In PaddlePaddle, Operator is used to do a certain computation such -// as "add", "sub", "cosine", etc. -// (1) Operator needs to know the input and output variable names. -// (2) Some ops may have special attributes such as "scale" in "CosineOp". -// -// 3rd-party language can build this proto message and call -// AddOp(const OpDesc& op_desc) of Paddle core to create an Operator. -message OpDesc { - // input names of this Operator. - repeated string inputs = 1; - - // output names of this Operator. - repeated string outputs = 2; - - // type of this Operator, such as "add", "sub", "fc". - required string type = 3; - - // Attributes of this Operator. e.g., scale=3.0 in cosine op. - repeated AttrDesc attrs = 4; -}; \ No newline at end of file diff --git a/paddle/framework/op_desc_test.cc b/paddle/framework/op_desc_test.cc deleted file mode 100644 index d0c52523b64725ee11c281b086f9ffed6a09e787..0000000000000000000000000000000000000000 --- a/paddle/framework/op_desc_test.cc +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -TEST(OpDesc, Create) { - paddle::framework::OpDesc op_desc; - op_desc.set_type("add"); - op_desc.add_inputs("X"); - op_desc.add_inputs("Y"); - op_desc.add_outputs("Z"); - - auto attr = op_desc.mutable_attrs()->Add(); - attr->set_type(paddle::framework::AttrType::FLOAT); - attr->set_f(3.14); - - // required field name is not set, so IsInitialized should be false. - ASSERT_FALSE(op_desc.IsInitialized()); - - attr->set_name("add"); - // after all required fields are set, IsInitialized should be true now. - ASSERT_TRUE(op_desc.IsInitialized()); -} \ No newline at end of file diff --git a/paddle/framework/op_proto.proto b/paddle/framework/op_proto.proto deleted file mode 100644 index 52292162874b9ca207fb0d3917df41ade096b143..0000000000000000000000000000000000000000 --- a/paddle/framework/op_proto.proto +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -// Protocol Message for 3rd-party language binding. -// -// Paddle Python package will use `OpProto` to generate op creation methods. -// The op creation methods take user's input and generate `OpDesc` proto -// message, -// then pass `OpDesc` to C++ side and create Op pointer. -// -syntax = "proto2"; -package paddle.framework; - -import "attribute.proto"; - -// Attribute protocol message for 3rd-party language binding. -// It will store the Op support what attribute and what type. -message AttrProto { - // Supported attribute name. e.g. `scale` for cosine op. - required string name = 1; - - // Supported attribute type. - required AttrType type = 2; - - // Supported attribute comments. It helps 3rd-party language generate - // doc-string. - required string comment = 3; - - // If that attribute is generated, it means the Paddle third language - // binding has responsibility to fill that attribute. End-User should - // not set that attribute. - optional bool generated = 4 [ default = false ]; -} - -// Input or output message for 3rd-party language binding. -// It contains parameter name and its comments. -message VarProto { - // Input or output name in that op creation function. - // e.g. `cos(a, b, output, ...)`, "a", "b", "output" are names. - required string name = 1; - - // The comment for that input. It helps 3rd-party language generate - // doc-string. - required string comment = 2; - - // Is that input/output could be a list or not. - // If so, that Op should write a attributed named `input_format` or - // `output_format`. - // - // e.g. - // If the op is a fc op, the inputs are `X`, `W`, `b`. The `X` and `W` - // could be multiple, so the multiple of `X` and `W` is True, and OpDesc - // will hold a attribute of them. - // - // The Op desc of same fc could be - // { - // "type": "fc", - // "input": ["X1", "X2", "W1", "W2", "b"], - // "output": "fc.out", - // "attrs" : { - // "input_format": [0, 2, 4, 5] - // } - // } - // - optional bool multiple = 3 [ default = false ]; - - // It marks that output is a temporary output. That output is not used by - // user, but used by other op internally as input. If other op is not use - // that output, it could be optimized early. - // - // Attribute temporary_index will be set in OpDesc if there is some - // outputs are temporary. - // - // output = [ "xxx.out1", "xxx.tmp", "xxx.out2"], - // attrs = { - // "temporary_index": [1] - // } - optional bool temporary = 4 [ default = false ]; - - // The gradient of operator can be ignored immediately - // e.g. operator AddOp, y = x1 + x2, the gradient of dy/dx1, dy/dx2 - // can be ignored for the future optimized on graph. - optional bool ignore_gradient = 6; -} - -// Op protocol message for 3rd-party language binding. -// It contains all information for generating op creation method. -message OpProto { - // The input information to generate op creation method. - repeated VarProto inputs = 1; - - // The output information to generate op creation method. - repeated VarProto outputs = 2; - - // The attribute information to generate op creation method. - repeated AttrProto attrs = 3; - - // The comments for that Op. It helps 3rd-party language generate - // doc-string. The whole documentation of that Op is generated by comment, - // inputs, outputs, attrs together. - required string comment = 4; - - // The type of that Op. - required string type = 5; -} diff --git a/paddle/framework/op_proto_test.cc b/paddle/framework/op_proto_test.cc deleted file mode 100644 index 9c054bde44e77571330cbc59074705f0cfc1cfb6..0000000000000000000000000000000000000000 --- a/paddle/framework/op_proto_test.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include - -TEST(TestOpProto, ALL) { - paddle::framework::OpProto proto; - { - auto ipt = proto.mutable_inputs()->Add(); - *ipt->mutable_name() = "a"; - *ipt->mutable_comment() = "the one input of cosine op"; - } - { - auto ipt = proto.mutable_inputs()->Add(); - *ipt->mutable_name() = "b"; - *ipt->mutable_comment() = "the other input of cosine op"; - } - { - auto opt = proto.mutable_outputs()->Add(); - *opt->mutable_name() = "output"; - *opt->mutable_comment() = "the output of cosine op"; - } - { - auto attr = proto.mutable_attrs()->Add(); - *attr->mutable_name() = "scale"; - attr->set_type(paddle::framework::AttrType::FLOAT); - *attr->mutable_comment() = "the scale attribute of cosine op"; - } - proto.set_type("cos"); - *proto.mutable_comment() = "cosine op, output = scale * cos(a, b)"; - - ASSERT_TRUE(proto.IsInitialized()); -} \ No newline at end of file diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index cb9164eec1788c2c19176115e8687bed49d8c0b6..3b793628aa6fdb08544ba90274736c9d29262a8b 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -20,8 +20,9 @@ limitations under the License. */ #include #include #include "paddle/framework/attribute.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/grad_op_builder.h" -#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/operator.h" #include "paddle/framework/scope.h" namespace paddle { @@ -44,52 +45,48 @@ class OpProtoAndCheckerMaker { protected: struct VariableBuilder { - VarProto* var_; - std::function on_multiple_; - std::function on_temporary_; + OpProto::Var* var_; - VariableBuilder& SetMultiple() { - var_->set_multiple(true); - on_multiple_(); + VariableBuilder& AsDuplicable() { + var_->set_duplicable(true); return *this; } - VariableBuilder& SetTemporary() { - PADDLE_ENFORCE(bool(on_temporary_), "Cannot set temporary"); - var_->set_temporary(true); - on_temporary_(); + VariableBuilder& AsIntermediate() { + var_->set_intermediate(true); return *this; } - VariableBuilder& IgnoreGradient() { - var_->set_ignore_gradient(true); + // TODO(FengJiayi, yuyang18): `AsNoGradient` is a very bad name, because it + // means that input/output is not needed when calculate gradient. It does + // not mean no gradient when backward. It should be changed soon. + VariableBuilder& AsNoGradient() { + var_->set_no_gradient(true); return *this; } }; VariableBuilder AddInput(const std::string& name, const std::string& comment) { - VarProto* input = proto_->add_inputs(); + auto* input = proto_->add_inputs(); input->set_name(name); input->set_comment(comment); - return VariableBuilder{input, [=] { this->SetHasMultipleInput(); }, - nullptr}; + return VariableBuilder{input}; } VariableBuilder AddOutput(const std::string& name, const std::string& comment) { - VarProto* output = proto_->add_outputs(); + auto* output = proto_->add_outputs(); output->set_name(name); output->set_comment(comment); - return VariableBuilder{output, [=] { this->SetHasMultipleOutput(); }, - [=] { this->SetHasTemporaryOutput(); }}; + return VariableBuilder{output}; } template TypedAttrChecker& AddAttr(const std::string& name, const std::string& comment, bool generated = false) { - AttrProto* attr = proto_->add_attrs(); + auto* attr = proto_->add_attrs(); attr->set_name(name); attr->set_comment(comment); attr->set_generated(generated); @@ -100,53 +97,6 @@ class OpProtoAndCheckerMaker { void AddComment(const std::string& comment) { proto_->set_comment(comment); } private: - void SetHasMultiple(const std::string& in_out, bool* flag) { - if (!*flag) { - AddAttr>(in_out + "_format", - "The multiple index of " + in_out + - "\n" - R"DOC( -This attribute is used by Paddle core framework. Paddle's Op support each input -or output could be a list of variable. This attribute is used to show how that -list organized. - -e.g. - input = ["a", "b", "c", "d", "e", "f"] - input_format = [0, 4, 5, 6] - -means - The number of all input variables this op is six, and they are segmented into - three inputs. - - The first input is input[0:4], second is input[4:5], third is input[5:6]. -)DOC", - /*generated*/ true); - *flag = true; - } - } - - void SetHasMultipleInput() { SetHasMultiple("input", &has_multiple_input_); } - void SetHasMultipleOutput() { - SetHasMultiple("output", &has_multiple_output_); - } - - void SetHasTemporaryOutput() { - if (!has_temporary_output_) { - AddAttr>("temporary_index", - R"DOC(The temporary index of output. - -Not all output of Paddle Op is used by user. For faster computation, each op -could output some its internal state to other op, other op could take that -output to make compute faster. - -Add a mark to which output is temporary is helpful for future optimization. -)DOC", - /*generated*/ true) - .SetDefault(std::vector()); - has_temporary_output_ = true; - } - } - void CheckNoDuplicatedInOutAttrs() { std::unordered_set names; auto checker = [&](const std::string& name) { @@ -167,22 +117,24 @@ Add a mark to which output is temporary is helpful for future optimization. OpProto* proto_; OpAttrChecker* op_checker_; bool validated_{false}; - bool has_multiple_input_{false}; - bool has_multiple_output_{false}; - bool has_temporary_output_{false}; }; class OpRegistry { - using OpCreator = std::function; - using VarIndexMap = std::unordered_map; - using VarNameList = std::vector; + using VarNameMap = OperatorBase::VarNameMap; + using OpCreator = std::function; public: template static void RegisterOp(const std::string& op_type) { - op_creators()[op_type] = [] { return new OpType; }; + op_creators()[op_type] = []( + const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) { + return new OpType(type, inputs, outputs, attrs); + }; OpAttrChecker& op_checker = op_checkers()[op_type]; - OpProto& op_proto = protos()[op_type]; + OpProto& op_proto = OpProtos()[op_type]; auto maker = ProtoMakerType(&op_proto, &op_checker); maker.Validate(); op_proto.set_type(op_type); @@ -190,66 +142,49 @@ class OpRegistry { op_proto.IsInitialized(), "Fail to initialize %s's OpProto, because %s is not initialized", op_type, op_proto.InitializationErrorString()); - - VarIndexMaps()[op_type].reset(new VarIndexMap()); - auto& varmap = *VarIndexMaps()[op_type]; - int idx = 0; - for (auto& var : op_proto.inputs()) { - varmap[var.name()] = idx++; - } - idx = 0; - for (auto& var : op_proto.outputs()) { - varmap[var.name()] = idx++; - } } template static void RegisterGradOp(const std::string& op_type, const std::string& grad_op_type) { - op_creators()[grad_op_type] = [] { return new GradOpType; }; + op_creators()[grad_op_type] = []( + const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) { + return new GradOpType(type, inputs, outputs, attrs); + }; grad_ops()[op_type] = grad_op_type; } static std::shared_ptr CreateOp(const std::string& type, - const VarNameList& inputs, - const VarNameList& outputs, - const AttributeMap& attrs) { + const VarNameMap& inputs, + const VarNameMap& outputs, + AttributeMap attrs) { auto op_create_it = op_creators().find(type); PADDLE_ENFORCE(op_create_it != op_creators().end(), "Operator %s cannot be found.", type); + op_checkers().at(type).Check(attrs); - auto op = op_create_it->second(); - op->type_ = type; - op->inputs_ = inputs; - op->outputs_ = outputs; + auto op = op_create_it->second(type, inputs, outputs, attrs); - op->attrs_ = attrs; - op_checkers().at(type).Check(op->attrs_); - - GenerateTempVariableName(op); + return std::shared_ptr(op); + } - { - auto var_index_it = VarIndexMaps().find(type); - if (var_index_it != VarIndexMaps().end()) { - op->in_out_idxs_ = var_index_it->second; - } + static VarNameMap ConvertOpDescVarsToVarNameMap( + const google::protobuf::RepeatedPtrField& op_desc_vars) { + VarNameMap ret_val; + for (auto& var : op_desc_vars) { + auto& var_names = ret_val[var.parameter()]; + auto& var_names_in_proto = var.arguments(); + var_names.reserve(static_cast(var_names_in_proto.size())); + std::copy(var_names_in_proto.begin(), var_names_in_proto.end(), + std::back_inserter(var_names)); } - - op->Init(); - return std::shared_ptr(op); + return ret_val; } static std::shared_ptr CreateOp(const OpDesc& op_desc) { - std::vector inputs; - inputs.reserve((size_t)op_desc.inputs_size()); - std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), - std::back_inserter(inputs)); - - std::vector outputs; - outputs.reserve((size_t)op_desc.outputs_size()); - std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), - std::back_inserter(outputs)); - + VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); + VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); AttributeMap attrs; for (auto& attr : op_desc.attrs()) { attrs[attr.name()] = GetAttrValue(attr); @@ -262,26 +197,14 @@ class OpRegistry { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); std::shared_ptr grad_op(BuildGradOp(&op)); - grad_op->Init(); return grad_op; } - static std::unordered_map& protos() { - static std::unordered_map protos_; - return protos_; - } - static std::unordered_map& grad_ops() { static std::unordered_map grad_ops_; return grad_ops_; } - static std::unordered_map>& - VarIndexMaps() { - static std::unordered_map> maps_; - return maps_; - } - static std::unordered_map& op_creators() { static std::unordered_map op_creators_; return op_creators_; @@ -292,17 +215,6 @@ class OpRegistry { static std::unordered_map op_checkers_; return op_checkers_; } - - static void GenerateTempVariableName(OperatorBase* op) { - static std::atomic gUniqId(0UL); - for (auto& outname : op->outputs_) { - if (outname == kTempVarName) { - outname += op->type_; - outname += "@"; - outname += std::to_string(gUniqId.fetch_add(1)); - } - } - } }; class Registrar { diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index e64126c7093a8eebc219afa4979d941ddc1afc97..0b8f8289490135b8976c38fa3fb3c2995c50416f 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -7,8 +7,7 @@ namespace paddle { namespace framework { class CosineOp : public OperatorBase { public: - DEFINE_OPERATOR_CTOR(CosineOp, OperatorBase) - + using OperatorBase::OperatorBase; void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} void InferShape(const Scope& scope) const override {} @@ -29,8 +28,7 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: - DEFINE_OPERATOR_CTOR(MyTestOp, OperatorBase) - + using OperatorBase::OperatorBase; void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} @@ -40,8 +38,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("input", "input of cosine op").SetMultiple(); - AddOutput("output", "output of cosine op").SetTemporary(); + AddInput("input", "input of cosine op").AsDuplicable(); + AddOutput("output", "output of cosine op").AsIntermediate(); auto my_checker = [](int i) { PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!"); }; @@ -53,6 +51,15 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { } // namespace framework } // namespace paddle +static void BuildVar(const std::string& param_name, + std::initializer_list arguments, + paddle::framework::OpDesc::Var* var) { + var->set_parameter(param_name); + for (auto& arg_name : arguments) { + var->add_arguments(arg_name); + } +} + REGISTER_OP(cos_sim, paddle::framework::CosineOp, paddle::framework::CosineOpProtoAndCheckerMaker); REGISTER_OP(my_test_op, paddle::framework::MyTestOp, @@ -61,8 +68,8 @@ REGISTER_OP(my_test_op, paddle::framework::MyTestOp, TEST(OpRegistry, CreateOp) { paddle::framework::OpDesc op_desc; op_desc.set_type("cos_sim"); - op_desc.add_inputs("aa"); - op_desc.add_outputs("bb"); + BuildVar("input", {"aa"}, op_desc.add_inputs()); + BuildVar("output", {"bb"}, op_desc.add_outputs()); float scale = 3.3; auto attr = op_desc.mutable_attrs()->Add(); @@ -82,8 +89,8 @@ TEST(OpRegistry, CreateOp) { TEST(OpRegistry, IllegalAttr) { paddle::framework::OpDesc op_desc; op_desc.set_type("cos_sim"); - op_desc.add_inputs("aa"); - op_desc.add_outputs("bb"); + BuildVar("input", {"aa"}, op_desc.add_inputs()); + BuildVar("output", {"bb"}, op_desc.add_outputs()); auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); @@ -107,8 +114,8 @@ TEST(OpRegistry, IllegalAttr) { TEST(OpRegistry, DefaultValue) { paddle::framework::OpDesc op_desc; op_desc.set_type("cos_sim"); - op_desc.add_inputs("aa"); - op_desc.add_outputs("bb"); + BuildVar("input", {"aa"}, op_desc.add_inputs()); + BuildVar("output", {"bb"}, op_desc.add_outputs()); ASSERT_TRUE(op_desc.IsInitialized()); @@ -120,20 +127,11 @@ TEST(OpRegistry, DefaultValue) { ASSERT_EQ(op->GetAttr("scale"), 1.0); } -static void SetInputFormat(paddle::framework::OpDesc* desc) { - auto attr = desc->add_attrs(); - attr->set_name("input_format"); - attr->set_type(paddle::framework::INTS); - attr->mutable_ints()->Add(0); - attr->mutable_ints()->Add(1); -} - TEST(OpRegistry, CustomChecker) { paddle::framework::OpDesc op_desc; op_desc.set_type("my_test_op"); - op_desc.add_inputs("ii"); - op_desc.add_outputs("oo"); - SetInputFormat(&op_desc); + BuildVar("input", {"ii"}, op_desc.add_inputs()); + BuildVar("output", {"oo"}, op_desc.add_outputs()); // attr 'test_attr' is not set bool caught = false; @@ -173,7 +171,6 @@ TEST(OpRegistry, CustomChecker) { attr->set_name("test_attr"); attr->set_type(paddle::framework::AttrType::INT); attr->set_i(4); - SetInputFormat(&op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc); paddle::platform::CPUDeviceContext dev_ctx; paddle::framework::Scope scope; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index d9a013b883abdec4422806f90e36da7410a4fa0c..13442a72b9d77a4858b5d91dd7690e089ec7ed49 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/framework/operator.h" +#include +#include "paddle/framework/op_registry.h" namespace paddle { namespace framework { @@ -33,84 +33,139 @@ ExecutionContext::GetEigenDevice() const { } #endif -const std::string& OperatorBase::Input(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, - "Input Output Indices could not be nullptr"); - auto it = in_out_idxs_->find(name); - PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", - name); - if (attrs_.count("input_format") == 0) { - return inputs_.at((size_t)it->second); - } else { - const auto& input_format = GetAttr>("input_format"); - int idx = input_format[it->second]; - return inputs_.at((size_t)idx); +static std::unordered_map* g_op_protos = nullptr; +std::unordered_map& OpProtos() { + if (g_op_protos == nullptr) { + g_op_protos = new std::unordered_map(); } + return *g_op_protos; } -std::vector OperatorBase::Inputs(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "IO Idx could not be nullptr"); - auto input_format = GetAttr>("input_format"); - auto offset = in_out_idxs_->at(name); - PADDLE_ENFORCE(input_format.at(static_cast(offset) + 1) <= - static_cast(inputs_.size()), - "Input Out Of Range"); - - return std::vector{ - inputs_.begin() + input_format.at(offset), - inputs_.begin() + input_format.at(offset + 1)}; +const std::string& OperatorBase::Input(const std::string& name) const { + auto& ins = Inputs(name); + PADDLE_ENFORCE_EQ(ins.size(), 1UL, + "Op %s input %s should contain only one variable", type_, + name); + return ins[0]; } -const std::string& OperatorBase::Output(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr"); - auto it = in_out_idxs_->find(name); - PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", +const std::vector& OperatorBase::Inputs( + const std::string& name) const { + auto it = inputs_.find(name); + PADDLE_ENFORCE(it != inputs_.end(), "Op %s do not have input %s", type_, name); - if (attrs_.count("output_format") == 0) { - return outputs_.at((size_t)it->second); - } else { - const auto& output_format = GetAttr>("output_format"); - int idx = output_format[it->second]; - return outputs_.at((size_t)idx); - } + return it->second; +} + +const std::string& OperatorBase::Output(const std::string& name) const { + auto& outs = Outputs(name); + PADDLE_ENFORCE_EQ(outs.size(), 1UL, + "Op %s output %s should contain only one variable", type_, + name); + return outs[0]; } -std::vector OperatorBase::Outputs(const std::string& name) const { - PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr"); - auto output_format = GetAttr>("output_format"); - auto offset = in_out_idxs_->at(name); - PADDLE_ENFORCE(output_format.at(static_cast(offset) + 1) <= - static_cast(outputs_.size()), - "Output Out of Range"); - return std::vector{ - outputs_.begin() + output_format.at(offset), - outputs_.begin() + output_format.at(offset + 1)}; +const std::vector& OperatorBase::Outputs( + const std::string& name) const { + auto it = outputs_.find(name); + PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_, + name); + return it->second; } std::string OperatorBase::DebugString() const { std::stringstream ss; - ss << "Op(" << type_ << "), inputs:("; - for (size_t i = 0; i < inputs_.size(); ++i) { - ss << inputs_[i]; - if (i != inputs_.size() - 1) { + ss << "Op(" << type_ << "), inputs:{"; + for (auto it = inputs_.begin(); it != inputs_.end();) { + auto& input = *it; + ss << input.first << "["; + for (size_t i = 0; i < input.second.size(); ++i) { + ss << input.second[i]; + if (i != input.second.size() - 1) { + ss << ", "; + } + } + ss << "]"; + ++it; + if (it != inputs_.end()) { ss << ", "; } } - ss << "), outputs:("; - for (size_t i = 0; i < outputs_.size(); ++i) { - ss << outputs_[i]; - if (i != outputs_.size() - 1) { + ss << "}, outputs:{"; + for (auto it = outputs_.begin(); it != outputs_.end();) { + auto& output = *it; + ss << output.first << "["; + for (size_t i = 0; i < output.second.size(); ++i) { + ss << output.second[i]; + if (i != output.second.size() - 1) { + ss << ", "; + } + } + ss << "]"; + ++it; + if (it != outputs_.end()) { ss << ", "; } } - ss << ")."; + ss << "}."; return ss.str(); } void OperatorBase::Rename(const std::string& old_name, const std::string& new_name) { - std::replace(inputs_.begin(), inputs_.end(), old_name, new_name); - std::replace(outputs_.begin(), outputs_.end(), old_name, new_name); + for (auto& input : inputs_) { + std::replace(input.second.begin(), input.second.end(), old_name, new_name); + } + for (auto& output : outputs_) { + std::replace(output.second.begin(), output.second.end(), old_name, + new_name); + } +} + +OperatorBase::OperatorBase(const std::string& type, + const OperatorBase::VarNameMap& inputs, + const OperatorBase::VarNameMap& outputs, + const AttributeMap& attrs) + : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) { + static std::atomic gUniqId(0UL); + for (auto& output : outputs_) { + for (auto& output_name : output.second) { + if (output_name == kTempVarName) { + output_name += type_; + output_name += "@"; + output_name += std::to_string(gUniqId.fetch_add(1)); + } + } + } +} + +std::vector OperatorBase::OutputVars(bool has_intermediate) const { + std::vector ret_val; + if (has_intermediate) { + // push all outputs into ret_val + for (auto& o : outputs_) { + ret_val.reserve(ret_val.size() + o.second.size()); + ret_val.insert(ret_val.end(), o.second.begin(), o.second.end()); + } + return ret_val; + } + auto it = OpProtos().find(type_); + PADDLE_ENFORCE( + it != OpProtos().end(), + "Operator %s not registered, cannot figure out intermediate outputs", + type_); + + // get all OpProto::Var for outputs + for (auto& o : it->second.outputs()) { + // ignore all intermediate output + if (o.intermediate()) continue; + auto out = outputs_.find(o.name()); + if (out != outputs_.end()) { + ret_val.reserve(ret_val.size() + out->second.size()); + ret_val.insert(ret_val.end(), out->second.begin(), out->second.end()); + } + } + return ret_val; } } // namespace framework diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 68e7fedcd6102435a3c30326aa91043b8abecb9e..4a72ced6ced92054eb170cd3012cafb181744953 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -20,8 +20,7 @@ limitations under the License. */ #include #include "paddle/framework/attribute.h" -#include "paddle/framework/op_desc.pb.h" -#include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" @@ -51,6 +50,8 @@ inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; } +extern std::unordered_map& OpProtos(); + class OperatorBase; class InferShapeContext; class ExecutionContext; @@ -63,16 +64,14 @@ class ExecutionContext; */ class OperatorBase { public: - OperatorBase() {} // TODO(yi): This constructor is to be removed. - OperatorBase(const std::string& type, const std::vector& inputs, - const std::vector& outputs, - const AttributeMap& attrs, - std::unordered_map* in_out_idxs) - : type_(type), - inputs_(inputs), - outputs_(outputs), - attrs_(attrs), - in_out_idxs_(in_out_idxs) {} + using VarNameMap = std::map>; + + OperatorBase(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs); + + OperatorBase(const OperatorBase& o) = delete; + OperatorBase& operator=(const OperatorBase& o) = delete; + OperatorBase(OperatorBase&& o) = delete; virtual ~OperatorBase() {} @@ -85,10 +84,6 @@ class OperatorBase { virtual std::string DebugString() const; - /// Init will be called after CreateOperator, you can put some initialization - /// logic here. - virtual void Init() {} - /// InferShape infer the size of Variables used by this Operator with /// information inside scope virtual void InferShape(const Scope& scope) const = 0; @@ -107,22 +102,18 @@ class OperatorBase { //! Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; //! Get a input which has multiple variables. - //! TODO add a vector_view to prevent memory copy. - std::vector Inputs(const std::string& name) const; + const std::vector& Inputs(const std::string& name) const; //! Get a output with argument's name described in `op_proto` const std::string& Output(const std::string& name) const; //! Get an output which has multiple variables. //! TODO add a vector_view to prevent memory copy. - std::vector Outputs(const std::string& name) const; + const std::vector& Outputs(const std::string& name) const; + + virtual std::vector OutputVars(bool has_intermediate) const; - const std::string Type() const { return type_; } - const std::vector Inputs() const { return inputs_; } - const std::vector Outputs() const { return outputs_; } + std::string Type() const { return type_; } const AttributeMap& Attrs() const { return attrs_; } - const std::unordered_map* InOutIdx() const { - return in_out_idxs_.get(); - } public: std::string type_; @@ -130,13 +121,12 @@ class OperatorBase { // I (Inputs) // O (Outputs) // OG (Output Gradients) - std::vector inputs_; + VarNameMap inputs_; + // NOTE: in case of OpGrad, outputs_ contains // IG (Inputs Gradients) - std::vector outputs_; + VarNameMap outputs_; AttributeMap attrs_; - // store the arguments' offset described in op_desc. - std::shared_ptr> in_out_idxs_; }; class InferShapeContext { @@ -144,16 +134,12 @@ class InferShapeContext { InferShapeContext(const OperatorBase& op, const Scope& scope) : op_(op), scope_(scope) {} - size_t InputSize() const { return op_.inputs_.size(); } - - size_t OutputSize() const { return op_.outputs_.size(); } - - const Variable* InputVar(const size_t index) const { - return scope_.FindVar(op_.inputs_.at(index)); + size_t InputSize(const std::string& name) const { + return op_.Inputs(name).size(); } - Variable* OutputVar(const size_t index) const { - return scope_.FindVar(op_.outputs_.at(index)); + size_t OutputSize(const std::string& name) const { + return op_.Outputs(name).size(); } const Variable* InputVar(const std::string& name) const { @@ -185,27 +171,9 @@ class InferShapeContext { return res; } - template - const T* Input(const size_t index) const { - auto var = InputVar(index); - PADDLE_ENFORCE_NOT_NULL(var, "Input(%d) should not be nullptr", index); - return &var->Get(); - } - - template - T* Output(const size_t index) const { - auto var = OutputVar(index); - PADDLE_ENFORCE_NOT_NULL( - var, - "Output(%d) not be nullptr, which means variable [%s] does not " - "exist in scope", - index, op_.outputs_[index]); - return var->GetMutable(); - } - template const T* Input(const std::string& name) const { - auto var = InputVar(name); + auto* var = InputVar(name); PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name); return &var->Get(); } @@ -281,6 +249,10 @@ class ExecutionContext : public InferShapeContext { platform::Place GetPlace() const { return device_context_->GetPlace(); } + const platform::DeviceContext* device_context() const { + return device_context_; + } + const platform::DeviceContext* device_context_; }; @@ -300,14 +272,6 @@ class OpKernel { class OperatorWithKernel : public OperatorBase { public: - OperatorWithKernel() {} // TODO(yi): This constructor is to be removed. - OperatorWithKernel(const std::string& type, - const std::vector& inputs, - const std::vector& outputs, - const AttributeMap& attrs, - std::unordered_map* in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} - struct OpKernelKey { platform::Place place_; @@ -331,6 +295,10 @@ class OperatorWithKernel : public OperatorBase { using OpKernelMap = std::unordered_map, OpKernelHash>; + OperatorWithKernel(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void InferShape(const Scope& scope) const override { InferShape(InferShapeContext(*this, scope)); } @@ -357,15 +325,5 @@ class OperatorWithKernel : public OperatorBase { virtual void InferShape(const InferShapeContext& ctx) const = 0; }; -#define DEFINE_OPERATOR_CTOR(Class, ParentClass) \ - public: \ - Class() { /* TODO(yi): This constructor is to be removed. */ \ - } \ - Class(const std::string& type, const std::vector& inputs, \ - const std::vector& outputs, \ - const ::paddle::framework::AttributeMap& attrs, \ - std::unordered_map* in_out_idxs) \ - : ParentClass(type, inputs, outputs, attrs, in_out_idxs) {} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 7dbd5b14ab6ec89ae9940a3d12ec9d2b169153ad..6804841587730d51d9cfad30a9de81401d36695b 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -23,22 +23,22 @@ static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { public: - DEFINE_OPERATOR_CTOR(OpWithoutKernelTest, OperatorBase) - - void Init() override { x = 1; } + OpWithoutKernelTest(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs), x(1) {} void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override { - op_run_num++; - ASSERT_EQ((int)inputs_.size(), 1); - ASSERT_EQ((int)outputs_.size(), 1); - ASSERT_EQ(scope.FindVar(inputs_[0]), nullptr); + ++op_run_num; + ASSERT_EQ(static_cast(inputs_.size()), 1); + ASSERT_EQ(static_cast(outputs_.size()), 1); + ASSERT_EQ(scope.FindVar(inputs_.at("input")[0]), nullptr); ASSERT_EQ(x, 1); - ASSERT_NE(scope.FindVar(outputs_[0]), nullptr); + ASSERT_NE(scope.FindVar(outputs_.at("output")[0]), nullptr); } public: - float x = 0; + int x{0}; }; class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -56,14 +56,24 @@ class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { } // namespace framework } // namespace paddle +static void BuildVar(const std::string& param_name, + std::initializer_list arguments, + paddle::framework::OpDesc::Var* var) { + var->set_parameter(param_name); + for (auto& arg_name : arguments) { + *var->mutable_arguments()->Add() = arg_name; + } +} + REGISTER_OP(test_operator, paddle::framework::OpWithoutKernelTest, paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker); TEST(OperatorBase, all) { paddle::framework::OpDesc op_desc; op_desc.set_type("test_operator"); - *op_desc.mutable_inputs()->Add() = "IN1"; - *op_desc.mutable_outputs()->Add() = "OUT1"; + BuildVar("input", {"IN1"}, op_desc.add_inputs()); + BuildVar("output", {"OUT1"}, op_desc.add_outputs()); + auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); @@ -100,7 +110,8 @@ static int cpu_kernel_run_num = 0; class OpWithKernelTest : public OperatorWithKernel { public: - DEFINE_OPERATOR_CTOR(OpWithKernelTest, OperatorWithKernel) + using OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext& ctx) const override {} }; @@ -117,35 +128,15 @@ class CPUKernelTest : public OpKernel { } }; -// multiple inputs test -class OperatorMultiInputsTest : public OperatorBase { - public: - DEFINE_OPERATOR_CTOR(OperatorMultiInputsTest, OperatorBase) - - void Init() override { x = 1; } - void InferShape(const Scope& scope) const override {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override { - ASSERT_EQ(scope.FindVar(inputs_[0]), nullptr); - ASSERT_EQ(x, 1); - ASSERT_NE(scope.FindVar(outputs_[0]), nullptr); - ASSERT_EQ(Input("x"), "IN1"); - ASSERT_EQ(Input("y"), "OUT1"); - } - - public: - float x = 0; -}; - class OpKernelTestMultiInputsProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("xs", "inputs of test op").SetMultiple(); + AddInput("xs", "inputs of test op").AsDuplicable(); AddInput("k", "input of test op"); - AddOutput("ys", "outputs of test op").SetMultiple(); + AddOutput("ys", "outputs of test op").AsDuplicable(); AddAttr("scale", "scale of cosine op") .SetDefault(1.0) .LargerThan(0.0); @@ -202,8 +193,9 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel, TEST(OpKernel, all) { paddle::framework::OpDesc op_desc; op_desc.set_type("op_with_kernel"); - *op_desc.mutable_inputs()->Add() = "IN1"; - *op_desc.mutable_outputs()->Add() = "OUT1"; + BuildVar("x", {"IN1"}, op_desc.add_inputs()); + BuildVar("y", {"OUT1"}, op_desc.add_outputs()); + auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); @@ -229,32 +221,15 @@ TEST(OpKernel, multi_inputs) { OpDesc op_desc; op_desc.set_type("op_multi_inputs_with_kernel"); - *op_desc.mutable_inputs()->Add() = "x0"; - *op_desc.mutable_inputs()->Add() = "x1"; - *op_desc.mutable_inputs()->Add() = "x2"; - *op_desc.mutable_inputs()->Add() = "k0"; - *op_desc.mutable_outputs()->Add() = "y0"; - *op_desc.mutable_outputs()->Add() = "y1"; + BuildVar("xs", {"x0", "x1", "x2"}, op_desc.add_inputs()); + BuildVar("k", {"k0"}, op_desc.add_inputs()); + BuildVar("ys", {"y0", "y1"}, op_desc.add_outputs()); + auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_f(3.14); - auto attr0 = op_desc.mutable_attrs()->Add(); - attr0->set_name("input_format"); - attr0->set_type(paddle::framework::AttrType::INTS); - auto input_format = attr0->mutable_ints(); - input_format->Add(0); // x0 - input_format->Add(3); // k - input_format->Add(4); // end - - auto attr1 = op_desc.mutable_attrs()->Add(); - attr1->set_name("output_format"); - attr1->set_type(paddle::framework::AttrType::INTS); - auto output_format = attr1->mutable_ints(); - output_format->Add(0); // y0 - output_format->Add(2); // y1 - paddle::platform::CPUDeviceContext cpu_device_context; paddle::framework::Scope scope; scope.NewVar("x0")->GetMutable(); diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 75cd5bcb38e1d864358314c1c15b6fb59e9c3752..07b42c83717652bdf0120b3004f39ac7f7a98d06 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -56,30 +56,18 @@ void ExposeOperator(ClassType &m) { return op.type_; }) .def("outputs", - [](const typename ClassType::type &op) -> std::vector { - return op.outputs_; - }) + [](const typename ClassType::type &op) + -> std::map> { + return op.outputs_; + }) .def("inputs", - [](const typename ClassType::type &op) -> std::vector { - return op.inputs_; - }) - .def("support_gpu", &ClassType::type::SupportGPU) - .def("temp_outputs", - [](const typename ClassType::type &op) -> std::vector { - auto iter = op.attrs_.find("temporary_index"); - std::vector ret; - if (iter == op.attrs_.end()) { - return ret; - } else { - auto tmp_idx = boost::get>(iter->second); - ret.reserve(tmp_idx.size()); - for (auto &index : tmp_idx) { - ret.push_back(op.outputs_.at(index)); - } - return ret; - } + [](const typename ClassType::type &op) { return op.inputs_; }) + .def("__str__", &ClassType::type::DebugString) + .def("no_intermediate_outputs", + [](const typename ClassType::type &op) { + return op.OutputVars(false); }) - .def("__str__", &ClassType::type::DebugString); + .def("support_gpu", &ClassType::type::SupportGPU); } static size_t UniqueIntegerGenerator() { @@ -172,7 +160,7 @@ All parameter, weight, gradient are variables in Paddle. //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { - auto &protos = OpRegistry::protos(); + auto &protos = OpProtos(); std::vector ret_values; for (auto it = protos.begin(); it != protos.end(); ++it) { PADDLE_ENFORCE(it->second.IsInitialized(), diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index cd1b4de426a49fa66dbbf8cf7d09990ac8d21227..b8c779f4e5fc7bc51298cdd35b26c2c8ac98edf6 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -105,6 +105,8 @@ class Tensor { template inline Tensor Slice(const int& begin_idx, const int& end_idx) const; + platform::Place place() const { return holder_->place(); } + private: template inline void check_memory_size() const; diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index fd9cfa1dc7a9028cb2c5c98baca98ffb2a837bac..a38880e14cdfcef05461dae567d198e5400c6bb1 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -388,14 +388,23 @@ void initDataLayer(TestConfig testConf, data.grad->zeroMem(); break; case INPUT_SELF_DEFINE_DATA: { - size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); - size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); - CHECK_GT(static_cast(height), 0); - CHECK_GT(static_cast(width), 0); - data.value = Matrix::create(height, width, false, useGpu); - data.grad = Matrix::create(height, width, false, useGpu); - data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); - data.grad->zeroMem(); + if (testConf.inputDefs[i].ids.size()) { + data.ids = IVector::create(testConf.inputDefs[i].ids.size(), useGpu); + data.ids->copyFrom(testConf.inputDefs[i].ids.data(), + testConf.inputDefs[i].ids.size()); + } else if (testConf.inputDefs[i].selfDefinedData) { + size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); + size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); + CHECK_GT(static_cast(height), 0); + CHECK_GT(static_cast(width), 0); + data.value = Matrix::create(height, width, false, useGpu); + data.grad = Matrix::create(height, width, false, useGpu); + data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); + data.grad->zeroMem(); + } else { + LOG(FATAL) << "No self-defined data are given."; + return; + } const std::vector& labelSeqStartPositions = testConf.inputDefs[i].labelSeqStartPositions; diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 5debedf5ef6a3262578ca01b335e664f9a334d35..88e831f78bd165f63806df6c081d84411be51502 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -68,6 +68,7 @@ struct InputDef { std::vector labelInitValue; std::vector labelSeqStartPositions; std::vector labelSubSeqStartPositions; + std::vector ids; MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { @@ -95,6 +96,23 @@ struct InputDef { isStatic = false; } + InputDef(InputType type, + string nameIn, + const std::vector& ids, + const std::vector& selfDefinedSeqStartPos = {}, + const std::vector& selfDefinedSubSeqStartPos = {}) + : labelSeqStartPositions(selfDefinedSeqStartPos), + labelSubSeqStartPositions(selfDefinedSubSeqStartPos), + ids(ids) { + selfDefinedData = nullptr; + inputType = type; + name = nameIn; + dim = 0; + sparse = {""}; + paraSize = 0; + isStatic = false; + } + InputDef(InputType type, string nameIn, size_t dimIn, diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index c181bd7b881c08dfd80d640b1ddce10b3c74d758..373611cc0ee952de813f01d32d1516e1a8384750 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,6 +41,7 @@ function(op_library TARGET) endif() endfunction() +add_subdirectory(math) cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_library(net_op SRCS net_op.cc DEPS op_registry) @@ -50,7 +51,7 @@ op_library(add_op SRCS add_op.cc add_op.cu) op_library(mean_op SRCS mean_op.cc mean_op.cu) -op_library(mul_op SRCS mul_op.cc mul_op.cu) +op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) @@ -62,7 +63,7 @@ op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS op_desc tensor op_registry operator net_op) + DEPS framework_proto tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index b886ded9bbd97dc1942c87d7603521e8d72e3f6c..c1f647a88e4547d96bbb9143cdb2cb07bc291635 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -18,17 +18,15 @@ namespace paddle { namespace operators { class AddOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(AddOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set"); - PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, - "Outputs of AddOp must all be set"); - PADDLE_ENFORCE(ctx.Input(0)->dims() == ctx.Input(1)->dims(), - "Two input of Add Op's dimension must be same."); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Two input of Add Op's dimension must be same."); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); } }; @@ -48,7 +46,9 @@ The equation is: Out = X + Y }; class AddOpGrad : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(AddOpGrad, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override {} }; diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index d76c10957e943deb970b1d79a1507a36669314e3..a7307b6818aa3d10ff215d06281e2b53196fd101 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -28,9 +28,9 @@ template class AddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input0 = context.Input(0); - auto input1 = context.Input(1); - auto output = context.Output(0); + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 09aa589d3caf7ed7b790850b515d49afdd3e1467..597c71d4e042e6b6a752c0b1819b909a7a9faa75 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -18,29 +18,25 @@ namespace paddle { namespace operators { class OnehotCrossEntropyOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(OnehotCrossEntropyOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, - "Input size of OnehotCrossEntropyOp must be two"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, - "Output size of OnehotCrossEntropyOp must be one"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), - "0-th input of OnehotCrossEntropyOp should be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), - "1-th input of OnehotCrossEntropyOp should be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), - "Outputs of OnehotCrossEntropyOp must all be set"); - PADDLE_ENFORCE_EQ(ctx.Input(0)->dims().size(), 2); - PADDLE_ENFORCE_EQ(ctx.Output(0)->dims().size(), 1, - "label's dimension must be 1."); - ctx.Output(0)->Resize({ctx.Input(0)->dims()[0]}); + auto *X = ctx.Input("X"); + auto *label = ctx.Input("label"); + + PADDLE_ENFORCE_EQ(X->dims().size(), 2, "X's dimension must be 2."); + PADDLE_ENFORCE_EQ(label->dims().size(), 1, "label's dimension must be 1."); + PADDLE_ENFORCE_EQ(X->dims()[0], label->dims()[0]); + ctx.Output("Y")->Resize({X->dims()[0]}); } }; class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(OnehotCrossEntropyGradientOp, - framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index d1bbc2cb66d6ce84ddcdcb87648f23c6ce77b748..b7df92c9a98ebf12b72a8d3d8e8e4e1a950f06c9 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -45,7 +45,7 @@ class OnehotCrossEntropyOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); const T* Xdata = X->data(); - const int* label_data = ctx.Input(1)->data(); + const int* label_data = ctx.Input("label")->data(); auto Y = ctx.Output("Y"); Y->mutable_data(ctx.GetPlace()); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index eda23a0ccfacd3a620412876e18f4ec47652bf9d..e42e33f1a3759ae26cee987d0b68a55b672e3f94 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -18,19 +18,13 @@ namespace paddle { namespace operators { class FillZerosLikeOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(FillZerosLikeOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, - "Input size of FillZerosLikeOp must be one."); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, - "Output size of AddOp must be one."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), - "Input of FillZerosLikeOp must be set."); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), - "Output of FillZerosLikeOp must be set."); - ctx.Output(0)->Resize( - ctx.Input(0)->dims()); + ctx.Output("Dst")->Resize( + ctx.Input("Src")->dims()); } }; diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index f846c7a8ab15e2cd997564edb36660a1360227a8..fd380ca8514b0ac50f39613368a4836bd485668b 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -23,7 +23,7 @@ template class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* output = context.Output(0); + auto* output = context.Output("Dst"); output->mutable_data(context.GetPlace()); auto t = framework::EigenVector::Flatten(*output); t.device(context.GetEigenDevice()) = t.constant(T(0)); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 893cf56e5cf0d99d3f3bfffe98734a868f9b7595..75249c08eb00095615fc75eb9261432d64246b2e 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -43,7 +43,9 @@ class GaussianRandomKernel : public framework::OpKernel { }; class GaussianRandomOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(GaussianRandomOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext& context) const override { auto* tensor = context.Output(0); diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..abcaf940ab0128d6948acc620d678632c8f48960 --- /dev/null +++ b/paddle/operators/math/CMakeLists.txt @@ -0,0 +1,13 @@ +if(WITH_MKLML) + set(BLAS_LIB mklml) +else() + set(BLAS_LIB cblas) +endif() + +if(WITH_GPU) + nv_library(math_function SRCS math_function.cc math_function.cu DEPS ${BLAS_LIB} device_context) +else() + cc_library(math_function SRCS math_function.cc DEPS ${BLAS_LIB} device_context) +endif() + +nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc new file mode 100644 index 0000000000000000000000000000000000000000..affdd1ac2cd486930881ee6b34a4b32f41df7ee9 --- /dev/null +++ b/paddle/operators/math/math_function.cc @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const float alpha, const float* A, + const float* B, const float beta, float* C, + platform::DeviceContext* context) { + int lda = K; + int ldb = N; + int ldc = N; + cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb, + beta, C, ldc); +} + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const double alpha, const double* A, + const double* B, const double beta, + double* C, + platform::DeviceContext* context) { + int lda = K; + int ldb = N; + int ldc = N; + cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb, + beta, C, ldc); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, float alpha, + framework::Tensor* matrix_out, + float beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) && + platform::is_cpu_place(matrix_b.place()) && + platform::is_cpu_place(matrix_out->place()), + "Matrix must all be in CPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, double alpha, + framework::Tensor* matrix_out, + double beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) && + platform::is_cpu_place(matrix_b.place()) && + platform::is_cpu_place(matrix_out->place()), + "Matrix must all be in CPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu new file mode 100644 index 0000000000000000000000000000000000000000..da40b27c948918e4997f4a046d2145552296158b --- /dev/null +++ b/paddle/operators/math/math_function.cu @@ -0,0 +1,127 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const float alpha, const float* A, + const float* B, const float beta, float* C, + platform::DeviceContext* context) { + // Note that cublas follows fortran order, so the order is different from + // the cblas convention. + int lda = (transA == CblasNoTrans) ? K : M; + int ldb = (transB == CblasNoTrans) ? N : K; + cublasOperation_t cuTransA = + (transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + cublasOperation_t cuTransB = + (transB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + + PADDLE_ENFORCE(platform::dynload::cublasSgemm( + reinterpret_cast(context)->cublas_handle(), + cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); +} + +template <> +void gemm(const CBLAS_TRANSPOSE transA, + const CBLAS_TRANSPOSE transB, const int M, + const int N, const int K, + const double alpha, const double* A, + const double* B, const double beta, + double* C, + platform::DeviceContext* context) { + // Note that cublas follows fortran order, so the order is different from + // the cblas convention. + int lda = (transA == CblasNoTrans) ? K : M; + int ldb = (transB == CblasNoTrans) ? N : K; + cublasOperation_t cuTransA = + (transA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + cublasOperation_t cuTransB = + (transB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; + PADDLE_ENFORCE(platform::dynload::cublasDgemm( + reinterpret_cast(context)->cublas_handle(), + cuTransB, cuTransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, float alpha, + framework::Tensor* matrix_out, + float beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && + platform::is_gpu_place(matrix_b.place()) && + platform::is_gpu_place(matrix_out->place()), + "Matrix must all be in GPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +template <> +void matmul(const framework::Tensor& matrix_a, + bool trans_a, + const framework::Tensor& matrix_b, + bool trans_b, double alpha, + framework::Tensor* matrix_out, + double beta, + platform::DeviceContext* context) { + auto dim_a = matrix_a.dims(); + auto dim_b = matrix_b.dims(); + auto dim_out = matrix_out->dims(); + PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2, + "The input and output of matmul be matrix"); + + PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && + platform::is_gpu_place(matrix_b.place()) && + platform::is_gpu_place(matrix_out->place()), + "Matrix must all be in GPUPlace"); + + int M = dim_out[0]; + int N = dim_out[1]; + int K = (trans_a == false) ? dim_a[1] : dim_a[0]; + + CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans; + CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans; + + gemm( + transA, transB, M, N, K, alpha, matrix_a.data(), + matrix_b.data(), beta, matrix_out->data(), context); +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h new file mode 100644 index 0000000000000000000000000000000000000000..155589fadb3ed9f59160a750d546dd8093a56cbe --- /dev/null +++ b/paddle/operators/math/math_function.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#ifdef PADDLE_USE_MKLML +#include +#include +#include +#endif + +#ifdef PADDLE_USE_MKL +#include +#include +#endif + +#ifdef PADDLE_USE_ATLAS +extern "C" { +#include +#include +} +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#include +#endif + +#ifndef LAPACK_FOUND +extern "C" { +#include +int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, + int* ipiv); +int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, + int* ipiv); +int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, + const int* ipiv); +int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, + const int* ipiv); +} +#endif + +#include + +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace math { + +// Support continuous memory now +// If transA = N, and transB = N +// Then matrixA: M * K, matrixB: K * N matrixC : M * N +// For more detailed info, please refer to +// http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html +template +void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, + const int M, const int N, const int K, const T alpha, const T* A, + const T* B, const T beta, T* C, platform::DeviceContext* context); + +// matrix multiply with continuous memory +template +void matmul(const framework::Tensor& matrix_a, bool trans_a, + const framework::Tensor& matrix_b, bool trans_b, T alpha, + framework::Tensor* matrix_out, T beta, + platform::DeviceContext* context); + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/math_function_test.cc b/paddle/operators/math/math_function_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6c020c4ff7285b43bc5836d80c173d3a068e72b3 --- /dev/null +++ b/paddle/operators/math/math_function_test.cc @@ -0,0 +1,75 @@ +#include "paddle/operators/math/math_function.h" +#include "gtest/gtest.h" + +#ifndef PADDLE_ONLY_CPU +TEST(math_function, notrans_mul_trans) { + paddle::framework::Tensor input1; + paddle::framework::Tensor input1_gpu; + paddle::framework::Tensor input2_gpu; + paddle::framework::Tensor out_gpu; + paddle::framework::Tensor out; + + auto* cpu_place = new paddle::platform::CPUPlace(); + float* input1_ptr = input1.mutable_data({2, 3}, *cpu_place); + float arr[6] = {0, 1, 2, 3, 4, 5}; + memcpy(input1_ptr, arr, 6 * sizeof(float)); + + auto* gpu_place = new paddle::platform::GPUPlace(0); + paddle::platform::DeviceContext* context = + new paddle::platform::CUDADeviceContext(*gpu_place); + + input1_gpu.CopyFrom(input1, *gpu_place); + input2_gpu.CopyFrom(input1, *gpu_place); + + out_gpu.mutable_data({2, 2}, *gpu_place); + + paddle::operators::math::matmul( + input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0, context); + + out.CopyFrom(out_gpu, *cpu_place); + + float* out_ptr = out.data(); + EXPECT_EQ(out_ptr[0], 5); + EXPECT_EQ(out_ptr[1], 14); + EXPECT_EQ(out_ptr[2], 14); + EXPECT_EQ(out_ptr[3], 50); +} + +TEST(math_function, trans_mul_notrans) { + paddle::framework::Tensor input1; + paddle::framework::Tensor input1_gpu; + paddle::framework::Tensor input2_gpu; + paddle::framework::Tensor out_gpu; + paddle::framework::Tensor out; + + auto* cpu_place = new paddle::platform::CPUPlace(); + float* input1_ptr = input1.mutable_data({2, 3}, *cpu_place); + float arr[6] = {0, 1, 2, 3, 4, 5}; + memcpy(input1_ptr, arr, 6 * sizeof(float)); + + auto* gpu_place = new paddle::platform::GPUPlace(0); + paddle::platform::DeviceContext* context = + new paddle::platform::CUDADeviceContext(*gpu_place); + + input1_gpu.CopyFrom(input1, *gpu_place); + input2_gpu.CopyFrom(input1, *gpu_place); + + out_gpu.mutable_data({3, 3}, *gpu_place); + + paddle::operators::math::matmul( + input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0, context); + + out.CopyFrom(out_gpu, *cpu_place); + + float* out_ptr = out.data(); + EXPECT_EQ(out_ptr[0], 9); + EXPECT_EQ(out_ptr[1], 12); + EXPECT_EQ(out_ptr[2], 15); + EXPECT_EQ(out_ptr[3], 12); + EXPECT_EQ(out_ptr[4], 17); + EXPECT_EQ(out_ptr[5], 22); + EXPECT_EQ(out_ptr[6], 15); + EXPECT_EQ(out_ptr[7], 22); + EXPECT_EQ(out_ptr[8], 29); +} +#endif diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index f6abba7ab45728f74dcea1363035a729b2cd1d03..35e7212dde210a50285272cfd94118fa34fb7cd9 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -18,14 +18,14 @@ namespace paddle { namespace operators { class MeanOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(MeanOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "output should be set"); - ctx.Output(0)->Resize(framework::make_ddim({1})); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input of MeanOp must be initialized."); + ctx.Output("Out")->Resize({1}); } }; @@ -34,13 +34,15 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of mean op"); - AddOutput("Out", "The output of mean op").IgnoreGradient(); + AddOutput("Out", "The output of mean op").AsNoGradient(); AddComment("Mean Operator"); } }; class MeanGradOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(MeanGradOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(framework::GradVarName("X")) diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index e8595a14faa7c1b03734f814c78f9cbf1819fbb5..fcb703e63bd5a82f9ffac2bf64e06fd0218dbdaa 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -31,14 +31,14 @@ template class MeanKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input = context.Input(0); - auto output = context.Output(0); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); auto X = EigenVector::Flatten(*input); auto y = EigenScalar::From(*output); - auto place = context.GetEigenDevice(); + auto& place = context.GetEigenDevice(); y.device(place) = X.mean(); } diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 6115a3f3332dba419b56e74a737627483448a715..032d234197c12fe107fb195e862c160948ee354c 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -13,17 +13,19 @@ limitations under the License. */ #include "paddle/operators/mul_op.h" +#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { class MulOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(MulOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs"); - auto dim0 = ctx.Input(0)->dims(); - auto dim1 = ctx.Input(1)->dims(); + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("Y")->dims(); PADDLE_ENFORCE_EQ(dim0.size(), 2, "input X(%s) should be a tensor with 2 dims, a matrix", ctx.op_.Input("X")); @@ -33,8 +35,7 @@ class MulOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim0[1], dim1[0], "First matrix's width must be equal with second matrix's height."); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "The mul op takes only one output"); - ctx.Output(0)->Resize({dim0[0], dim1[1]}); + ctx.Output("Out")->Resize({dim0[0], dim1[1]}); } }; @@ -54,7 +55,9 @@ The equation is: Out = X * Y }; class MulOpGrad : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(MulOpGrad, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 43debbc21a365a15c914e60e151f7782b82080cb..346a7e505d123b5e4e831daa39a1f6349b3dcccf 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -16,5 +16,4 @@ #include "paddle/operators/mul_op.h" namespace ops = paddle::operators; - REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ab12631c03453a18fbb067e2d12c2bc332acd567..b7812fd1a7a72f5ce543e18c8b7b5b51deff2204 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -13,6 +13,9 @@ limitations under the License. */ #pragma once + +#include "paddle/operators/math/math_function.h" + #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" @@ -30,17 +33,14 @@ class MulKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { Eigen::array, 1> dim_pair = { {Eigen::IndexPair(1, 0)}}; - - auto input0 = context.Input("X"); - auto input1 = context.Input("Y"); - auto output = context.Output(0); - + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); - auto X = EigenMatrix::From(*input0); auto Y = EigenMatrix::From(*input1); auto Z = EigenMatrix::From(*output); - auto place = context.GetEigenDevice(); + auto& place = context.GetEigenDevice(); Z.device(place) = X.contract(Y, dim_pair); } diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index a466c4f30fe87db4ad2a44518e083b57f3cbc2ed..1d1b290440ec125bdb5b190745735dd077261731 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -15,48 +15,42 @@ */ #include "paddle/operators/net_op.h" +#include +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +const char NetOp::kAll[] = "all"; + void NetOp::CompleteAddOp(bool calc) { add_op_done_ = true; if (!calc) return; - std::unordered_set input_set; - std::unordered_set output_set; - std::unordered_set temp_output; + std::set input_set; + std::set output_set; for (auto& op : ops_) { for (auto& ipt : op->inputs_) { - if (!Contains(output_set, ipt)) { // Not other op's output - input_set.insert(ipt); - } else { - temp_output.insert(ipt); + for (auto& var_name : ipt.second) { + if (!Contains(output_set, var_name)) { // Not other op's output + input_set.insert(var_name); + } else { + intermediate_outputs_.insert(var_name); + } } } for (auto& opt : op->outputs_) { - output_set.insert(opt); - } - } - - inputs_.reserve(input_set.size()); - std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs_)); - std::sort(inputs_.begin(), inputs_.end()); - - outputs_.reserve(output_set.size()); - std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs_)); - std::sort(outputs_.begin(), outputs_.end()); - - std::vector tmp_index; - tmp_index.reserve(temp_output.size()); - int output_len = static_cast(outputs_.size()); - for (int i = 0; i < output_len; ++i) { - if (Contains(temp_output, outputs_[i])) { - tmp_index.push_back(i); + for (auto& var_name : opt.second) { + output_set.insert(var_name); + } } } - - attrs_["temporary_index"] = tmp_index; + auto& inputs = inputs_[kAll]; + inputs.reserve(input_set.size()); + std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs)); + auto& outputs = outputs_[kAll]; + outputs.reserve(output_set.size()); + std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs)); } std::string NetOp::DebugString() const { @@ -73,5 +67,25 @@ std::string NetOp::DebugString() const { bool NetOp::IsNetOp() const { return true; } +std::vector NetOp::OutputVars(bool has_intermediate) const { + if (has_intermediate) { + return this->outputs_.at(kAll); + } + auto& all = this->outputs_.at(kAll); + std::vector ret_val; + for (auto& each : all) { + if (!Contains(intermediate_outputs_, each)) { + ret_val.push_back(each); + } + } + return ret_val; +} + +NetOp::NetOp(const std::string& type, + const framework::OperatorBase::VarNameMap& inputs, + const framework::OperatorBase::VarNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + } // namespace operators } // namespace paddle diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 24c9e61c66933c6be5bf44b3537e00b70a33922f..4a3408c158a029a96740717280c1562671fa938f 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "paddle/framework/framework.pb.h" #include "paddle/framework/op_registry.h" namespace paddle { @@ -35,7 +36,10 @@ namespace operators { */ class NetOp : public framework::OperatorBase { public: - DEFINE_OPERATOR_CTOR(NetOp, framework::OperatorBase) + static const char kAll[]; + NetOp() : framework::OperatorBase("plain_net", {}, {}, {}) {} + NetOp(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const framework::AttributeMap& attrs); /** * Infer all the operators' input and output variables' shapes, will be called @@ -92,11 +96,13 @@ class NetOp : public framework::OperatorBase { std::string DebugString() const override; bool IsNetOp() const override; + std::vector OutputVars(bool has_intermediate) const override; std::vector> ops_; private: bool add_op_done_{false}; + std::set intermediate_outputs_; template static bool Contains(T container, KeyType key) { diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index 0d5c3de798d0b580860d24ea9a61a6a4ede5d0ab..f7aa56262ef71c24bf668950f6e9914e5f96ff70 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -12,8 +12,7 @@ static int run_cnt = 0; class TestOp : public framework::OperatorBase { public: - DEFINE_OPERATOR_CTOR(TestOp, framework::OperatorBase) - + using framework::OperatorBase::OperatorBase; void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override { @@ -23,8 +22,7 @@ class TestOp : public framework::OperatorBase { class EmptyOp : public framework::OperatorBase { public: - DEFINE_OPERATOR_CTOR(EmptyOp, framework::OperatorBase) - + using framework::OperatorBase::OperatorBase; void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; @@ -46,40 +44,32 @@ TEST(OpKernel, all) { auto net = std::make_shared(); ASSERT_NE(net, nullptr); - auto op1 = std::make_shared(); - op1->inputs_ = {"x", "w1", "b1"}; - op1->outputs_ = {"y"}; + auto op1 = std::shared_ptr( + new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, + {{"Out", {"y"}}}, {})); net->AddOp(op1); - auto op2 = std::make_shared(); - op2->inputs_ = {"y", "w2", "b2"}; - op2->outputs_ = {"z"}; + auto op2 = std::shared_ptr( + new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, + {{"Out", {"z"}}}, {})); net->AddOp(op2); net->CompleteAddOp(); - AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, net->inputs_); - AssertSameVectorWithoutOrder({"y", "z"}, net->outputs_); - auto tmp_idx_iter = net->attrs_.find("temporary_index"); - ASSERT_NE(net->attrs_.end(), tmp_idx_iter); - auto& tmp_idx = boost::get>(tmp_idx_iter->second); - ASSERT_EQ(1UL, tmp_idx.size()); - ASSERT_EQ("y", net->outputs_[tmp_idx[0]]); + AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, + net->inputs_.at(NetOp::kAll)); + AssertSameVectorWithoutOrder({"y", "z"}, net->outputs_.at(NetOp::kAll)); - Scope scope; - platform::CPUDeviceContext dev_ctx; + auto final_outs = net->OutputVars(false); - net->InferShape(scope); - net->Run(scope, dev_ctx); - ASSERT_EQ(2, infer_shape_cnt); - ASSERT_EQ(2, run_cnt); - ASSERT_THROW(net->AddOp(op2), platform::EnforceNotMet); + ASSERT_EQ(final_outs.size(), 1UL); + ASSERT_EQ(final_outs[0], "z"); } TEST(NetOp, insert_op) { NetOp net; - auto op1 = std::make_shared(); - op1->inputs_ = {"x", "w1", "b1"}; - op1->outputs_ = {"y"}; + auto op1 = std::shared_ptr( + new EmptyOp("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, + {{"Out", {"y"}}}, {})); net.AddOp(op1); net.InsertOp(0, op1); ASSERT_EQ(2UL, net.ops_.size()); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 243837420562634c3d99fd0acf234ebd53539735..5ddee75581824996fd312f8ddf13007759fd9a67 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -91,12 +91,17 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // create step net's temp inputs for (auto& input : net_op->inputs_) { // the weight are located in parent scope - if (!step_scope.FindVar(input)) - step_scope.NewVar(input)->GetMutable(); + for (auto& var_name : input.second) { + if (!step_scope.FindVar(var_name)) { + step_scope.NewVar(var_name)->GetMutable(); + } + } } // create stepnet's outputs for (const auto& output : net_op->outputs_) { - step_scope.NewVar(output); + for (auto& var_name : output.second) { + step_scope.NewVar(var_name); + } } step_scopes->emplace_back(&step_scope); } @@ -130,8 +135,11 @@ const rnn::ArgumentName RecurrentGradientOp::kArgName{ "inlink@grad", "inlink_alias", "outlink_alias", "memories", "pre_memories", "boot_memories@grad"}; -void RecurrentOp::Init() { - OperatorBase::Init(); +RecurrentOp::RecurrentOp(const std::string& type, + const framework::OperatorBase::VarNameMap& inputs, + const framework::OperatorBase::VarNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) { std::unique_ptr arg(new rnn::Argument()); rnn::InitArgument(kArgName, arg.get(), *this); alg_.Init(std::move(arg)); @@ -147,13 +155,13 @@ class RecurrentAlgorithmProtoAndCheckerMaker // inputs and outputs stored in proto AddInput(name.inlinks, "the inputs that need to be segmented for each step.") - .SetMultiple(); + .AsDuplicable(); AddInput(name.boot_memories, "variables to initialize memories.") - .SetMultiple(); + .AsDuplicable(); AddInput(name.step_net, "network shared by all steps."); AddOutput(name.outlinks, "the outputs that need to concated for all steps.") - .SetMultiple(); + .AsDuplicable(); AddOutput(name.step_scopes, "step scopes"); // Attributes stored in AttributeMap @@ -225,8 +233,11 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/); } -void RecurrentGradientOp::Init() { - OperatorBase::Init(); +RecurrentGradientOp::RecurrentGradientOp( + const std::string& type, const framework::OperatorBase::VarNameMap& inputs, + const framework::OperatorBase::VarNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) { std::unique_ptr arg(new rnn::Argument()); rnn::InitArgument(kArgName, arg.get(), *this); alg_.Init(std::move(arg)); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index fdd9d005378e63b8d44803fb2b4be83d134c6a5b..8f4f2444d844b4ba5948f001a365a7ecaeecc106 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -100,13 +100,12 @@ class RecurrentGradientAlgorithm { }; class RecurrentOp final : public framework::OperatorBase { - DEFINE_OPERATOR_CTOR(RecurrentOp, framework::OperatorBase) public: - void Init() override; - + RecurrentOp(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const framework::AttributeMap& attrs); /** - * InferShape must be called before Run. - */ + * InferShape must be called before Run. + */ void InferShape(const framework::Scope& scope) const override { alg_.InferShape(scope); } @@ -124,7 +123,9 @@ class RecurrentOp final : public framework::OperatorBase { class RecurrentGradientOp final : public framework::OperatorBase { public: - void Init() override; + RecurrentGradientOp(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, + const framework::AttributeMap& attrs); /** * InferShape must be called before Run. diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 0c9a343415835540c7543f15f40c53b78a6a55c4..2f6eff0720847fdfa6443d2fc233e92dac2d0fab 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -25,157 +25,7 @@ namespace paddle { namespace operators { -using framework::make_ddim; -using framework::DDim; -using framework::Tensor; -using framework::Variable; -using framework::Scope; -using framework::OpRegistry; - -class RecurrentOpTest : public ::testing::Test { - protected: - virtual void SetUp() override { - CreateGlobalVariables(); - CreateStepNet(); - CreateRNNOp(); - } - - virtual void TearDown() override {} - - void CreateGlobalVariables() { - // create input, and init content - LOG(INFO) << "create global variable x"; - for (auto inlink : std::vector{"x", "x0", "x1", "h"}) { - Variable* x = scope_.NewVar(inlink); - DDim dims = make_ddim(std::vector{ - 10 /*sent size*/, 20 /*batch size*/, 30 /*input dim*/}); - x->GetMutable()->mutable_data(dims, platform::CPUPlace()); - } - // create output alias just for test - for (auto inlink : std::vector{"h@alias"}) { - Variable* x = scope_.NewVar(inlink); - DDim dims = - make_ddim(std::vector{20 /*batch size*/, 30 /*input dim*/}); - x->GetMutable()->mutable_data(dims, platform::CPUPlace()); - } - - LOG(INFO) << "create global variable w"; - Variable* w = scope_.NewVar("rnn/w"); - w->GetMutable()->mutable_data( - make_ddim(std::vector{30, 30}), platform::CPUPlace()); - - for (auto boot : std::vector{"h_boot"}) { - LOG(INFO) << "create global variable " << boot; - Variable* h_boot = scope_.NewVar(boot); - h_boot->GetMutable()->mutable_data( - make_ddim(std::vector{20 /*batch size*/, 30 /*input dim*/}), - platform::CPUPlace()); - } - - LOG(INFO) << "create variable step_scopes"; - scope_.NewVar("step_scopes"); - - LOG(INFO) << "create variable h"; - scope_.NewVar("h"); - } - - void CreateRNNOp() { - framework::OpDesc op_desc; - - op_desc.set_type("recurrent_op"); - // inlinks 0 - op_desc.add_inputs("x"); - op_desc.add_inputs("x0"); - op_desc.add_inputs("x1"); - // boot_memories 3 - op_desc.add_inputs("h_boot"); - // step net 5 - op_desc.add_inputs("step_net"); - // outlinks 6 - op_desc.add_outputs("h"); - // step scopes 7 - op_desc.add_outputs("step_scopes"); - - auto _input_format = std::vector{ - 0, // in_link - 3, // memories - 4 // step_net - }; - auto input_format = op_desc.add_attrs(); - input_format->set_name("input_format"); - input_format->set_type(paddle::framework::AttrType::INTS); - for (auto i : _input_format) { - input_format->add_ints(i); - } - - auto output_format = op_desc.add_attrs(); - output_format->set_name("output_format"); - output_format->set_type(paddle::framework::AttrType::INTS); - for (auto i : std::vector{0, 1, 2}) { - output_format->add_ints(i); - } - - auto inlink_alias = op_desc.add_attrs(); - inlink_alias->set_name("inlink_alias"); - inlink_alias->set_type(paddle::framework::AttrType::STRINGS); - - auto outlink_alias = op_desc.add_attrs(); - outlink_alias->set_name("outlink_alias"); - outlink_alias->set_type(paddle::framework::AttrType::STRINGS); - - auto pre_memories = op_desc.add_attrs(); - pre_memories->set_name("pre_memories"); - pre_memories->set_type(paddle::framework::AttrType::STRINGS); - - auto memories = op_desc.add_attrs(); - memories->set_name("memories"); - memories->set_type(paddle::framework::AttrType::STRINGS); - - // create inlink_alias - for (const auto& item : - std::vector{"x@alias", "x0@alias", "x1@alias"}) { - inlink_alias->add_strings(item); - } - // pre memories - for (const auto& item : std::vector{"rnn/h@pre"}) { - pre_memories->add_strings(item); - } - // memories - for (const auto& item : std::vector{"rnn/h"}) { - memories->add_strings(item); - } - // output alias - for (const auto& item : std::vector{"h@alias"}) { - outlink_alias->add_strings(item); - } - - rnn_op_ = OpRegistry::CreateOp(op_desc); - - LOG(INFO) << "rnn_op finish init"; - } - - void CreateStepNet() { - LOG(INFO) << "create variable step_net"; - Variable* var = scope_.NewVar("step_net"); - auto net = var->GetMutable(); - net->AddOp( - OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {})); - - net->AddOp( - OpRegistry::CreateOp("add_two", {"x@alias", "rnn/s"}, {"rnn/h"}, {})); - net->CompleteAddOp(); - } - - // father scope - Scope scope_; - std::shared_ptr rnn_op_; -}; - -TEST_F(RecurrentOpTest, Run) { - platform::CPUDeviceContext ctx; - rnn_op_->InferShape(scope_); - rnn_op_->Run(scope_, ctx); -} +using namespace paddle::framework; class RecurrentGradientAlgorithmTest : public ::testing::Test { protected: @@ -281,11 +131,13 @@ class RecurrentGradientAlgorithmTest : public ::testing::Test { LOG(INFO) << "create variable step_net"; Variable* var = scope_.NewVar("step_net"); auto net = var->GetMutable(); - net->AddOp(OpRegistry::CreateOp("mul", {"rnn/h_pre", "rnn/w", "rnn/s_grad"}, - {"rnn/h_pre_grad", "rnn/w_grad"}, {})); + // TODO(qingqing) modify backward op create for RNNOp unit test + // and the unit test will be removed to Python. + // net->AddOp(OpRegistry::CreateOp("mul", {"X", {"rnn/h_pre", "rnn/w", + // "rnn/s_grad"}}, {"Y", {"rnn/h_pre_grad", "rnn/w_grad"}}, {})); - net->AddOp(OpRegistry::CreateOp("add_two", {"rnn/h_grad"}, - {"rnn/x_grad", "rnn/s_grad"}, {})); + // net->AddOp(OpRegistry::CreateOp("add_two", {"X", {"rnn/h_grad"}}, + // {"Y", {"rnn/x_grad"}}, {"Out", "rnn/s_grad"}}, {})); net->CompleteAddOp(); } @@ -359,7 +211,8 @@ TEST(RecurrentOp, LinkMemories) { memories.push_back(mem_attr); for (size_t i = 1; i < len; ++i) { - rnn::LinkMemories(step_scopes, memories, i, -1, false /*infer_shape_mode*/); + rnn::LinkMemories(step_scopes, memories, i, -1, false + /*infer_shape_mode*/); } // check for (size_t i = 0; i < len - 1; ++i) { @@ -375,7 +228,8 @@ TEST(RecurrentOp, LinkMemories) { } for (int i = len - 2; i >= 0; --i) { - rnn::LinkMemories(step_scopes, memories, i, 1, false /*infer_shape_mode*/); + rnn::LinkMemories(step_scopes, memories, i, 1, false + /*infer_shape_mode*/); } // check for (int i = len - 2; i >= 0; --i) { diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 402f6340a04d9b423bb16431a99a2f2866d203bc..b4671c293af1c4fed3b441f05bc8f3a5db039b41 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -18,19 +18,19 @@ namespace paddle { namespace operators { class RowWiseAddOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(RowWiseAddOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2UL, - "Two inputs is needed by rowwise add"); - auto dim0 = ctx.Input(0)->dims(); - auto dim1 = ctx.Input(1)->dims(); + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("b")->dims(); PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix"); PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, "The output size must be 1"); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + PADDLE_ENFORCE(ctx.OutputSize("Out") == 1, "The output size must be 1"); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 82e9d70e959441869b958c1241fa5f5beef4c50c..01f88f2198774fbaa4c98ff9bf286f2f08496a9a 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -31,11 +31,11 @@ template class RowWiseAddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto out = context.Output(0); + auto out = context.Output("Out"); out->mutable_data(context.GetPlace()); - auto input = EigenMatrix::From(*context.Input(0)); - auto bias = EigenVector::From(*context.Input(1)); + auto input = EigenMatrix::From(*context.Input("X")); + auto bias = EigenVector::From(*context.Input("b")); auto output = EigenMatrix::From(*out); const int bias_size = bias.dimension(0); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 5b8093f0f77e0982a7ad25b42b299a6461712630..bf76df272b6faaed01ed8d715fe3b547ec7dc4e3 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -18,17 +18,15 @@ namespace paddle { namespace operators { class SGDOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(SGDOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), "inputs[1] mast be set"); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "outputs[0] mast be set"); - PADDLE_ENFORCE(ctx.Input(0)->dims() == ctx.Input(1)->dims(), - "Two input of SGD Op's dimension must be same."); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + PADDLE_ENFORCE( + ctx.Input("param")->dims() == ctx.Input("grad")->dims(), + "Two input of SGD Op's dimension must be same."); + ctx.Output("param_out")->Resize(ctx.Input("param")->dims()); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index a02e2dc39e8f0d3e31c22a5cafeff111d08aa905..a7dfb624e5b779164eb07763eb604c548f6e89e7 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -18,12 +18,12 @@ namespace paddle { namespace operators { class SigmoidOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(SigmoidOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, "Sigmoid Op only have one output"); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -39,7 +39,9 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { }; class SigmoidOpGrad : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(SigmoidOpGrad, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 7af879b2091e4a7f80a3a64be029394156650c23..11ab923eb346c1f8de3a6bbebdfa874b6530004a 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -28,8 +28,8 @@ template class SigmoidKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input = context.Input(0); - auto output = context.Output(0); + auto input = context.Input("X"); + auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); // The clipping is used in Paddle's raw implenmention diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 9b6a679642303a2cb34954ce16b4a5811acf0ec2..5d8ece1a254a58990bfb2f919567fa43689335b9 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -18,15 +18,13 @@ namespace paddle { namespace operators { class SoftmaxOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(SoftmaxOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, - "Only one input is need for softmax"); - PADDLE_ENFORCE_EQ(ctx.Input("X")->dims().size(), 2UL, - "The input of softmax op must be matrix"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, - "Only one output is need for softmax"); + PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, + "The input of softmax op must be matrix"); ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -43,14 +41,12 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { }; class SoftmaxOpGrad : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(SoftmaxOpGrad, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL, - "Input of SoftmaxOpGrad should be 3, X, Y, YG"); - PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, - "Output of SoftmaxOpGrad should be 1"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null"); + PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), "Input(Y@GRAD) should not be null"); PADDLE_ENFORCE(ctx.Input("Y")->dims() == diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index ea81ec053f8b9029114f7c98d292a778dc50c3e4..9d668e6085b93bc5a3a06683aa4470f62ae47c02 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -27,7 +27,7 @@ template class CPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.op_.GetAttr("seed")); @@ -46,12 +46,14 @@ class CPUUniformRandomKernel : public framework::OpKernel { }; class UniformRandomOp : public framework::OperatorWithKernel { - DEFINE_OPERATOR_CTOR(UniformRandomOp, framework::OperatorWithKernel) + public: + using framework::OperatorWithKernel::OperatorWithKernel; + protected: void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), "uniform_random's min must less then max"); - auto* tensor = ctx.Output(0); + auto* tensor = ctx.Output("Out"); auto dims = GetAttr>("dims"); tensor->Resize(framework::make_ddim(dims)); } diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index b35ebe7b630be72a5856ec1d3cc32bfaf097aa8a..7a243555b6385af690e9632dfa81bf96d70f925d 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -46,7 +46,7 @@ template class GPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.op_.GetAttr("seed")); diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index aad8097dbb33cbf6c0f2b4b3efb1376fbe96bc74..9d8343c0b5e200b390ccda760f09816959952e9d 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -62,12 +62,12 @@ extern void *cublas_dso_handle; DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ - __macro(cublasSgemv); \ - __macro(cublasDgemv); \ - __macro(cublasSgemm); \ - __macro(cublasDgemm); \ - __macro(cublasSgeam); \ - __macro(cublasDgeam); \ + __macro(cublasSgemv_v2); \ + __macro(cublasDgemv_v2); \ + __macro(cublasSgemm_v2); \ + __macro(cublasDgemm_v2); \ + __macro(cublasSgeam_v2); \ + __macro(cublasDgeam_v2); \ __macro(cublasCreate_v2); \ __macro(cublasDestroy_v2); \ __macro(cublasSetStream_v2); \ diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 7fd8b55a5d167294d3270c79f7b64da03443afd3..904de08da4efa4df49cdc1e391e2674608a4e84b 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -1,7 +1,5 @@ import paddle.v2.framework.core as core -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 -import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 -import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2 +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 def get_all_op_protos(): @@ -12,11 +10,15 @@ def get_all_op_protos(): protostrs = core.get_all_op_protos() ret_values = [] for pbstr in protostrs: - op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) + op_proto = framework_pb2.OpProto.FromString(str(pbstr)) ret_values.append(op_proto) return ret_values +def is_str(s): + return isinstance(s, str) or isinstance(s, unicode) + + class OpDescCreationMethod(object): """ A Functor object to convert user input(use key word args) to OpDesc based on @@ -27,7 +29,7 @@ class OpDescCreationMethod(object): """ def __init__(self, op_proto): - if not isinstance(op_proto, op_proto_pb2.OpProto): + if not isinstance(op_proto, framework_pb2.OpProto): raise TypeError("Argument should be OpProto") self.__op_proto__ = op_proto @@ -39,26 +41,34 @@ class OpDescCreationMethod(object): """ if len(args) != 0: raise ValueError("Only keyword arguments is supported by Paddle") - op_desc = op_desc_pb2.OpDesc() - - # Inputs - ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output( - "input", kwargs, self.__op_proto__.inputs) - op_desc.inputs.extend(ipts) - if ipt_format is not None: - op_desc.attrs.extend([ipt_format]) - - # Outputs - outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output( - "output", kwargs, self.__op_proto__.outputs) - op_desc.outputs.extend(outs) - if out_format is not None: - op_desc.attrs.extend([out_format]) - if len(tmp_index) != 0: - tmp_index_attr = op_desc.attrs.add() - tmp_index_attr.type = attribute_pb2.INTS - tmp_index_attr.name = "temporary_index" - tmp_index_attr.ints.extend(tmp_index) + op_desc = framework_pb2.OpDesc() + + for input_parameter in self.__op_proto__.inputs: + input_arguments = kwargs.get(input_parameter.name, []) + if is_str(input_arguments): + input_arguments = [input_arguments] + + if not input_parameter.duplicable and len(input_arguments) > 1: + raise ValueError("Input %s only accepts one input, but give %d" + % (input_parameter.name, len(input_arguments))) + + ipt = op_desc.inputs.add() + ipt.parameter = input_parameter.name + ipt.arguments.extend(input_arguments) + + for output_parameter in self.__op_proto__.outputs: + output_arguments = kwargs.get(output_parameter.name, []) + if is_str(output_arguments): + output_arguments = [output_arguments] + + if not output_parameter.duplicable and len(output_arguments) > 1: + raise ValueError( + "Output %s only accepts one output, but give %d" % + (output_parameter.name, len(output_arguments))) + + out = op_desc.outputs.add() + out.parameter = output_parameter.name + out.arguments.extend(output_arguments) # Types op_desc.type = self.__op_proto__.type @@ -72,17 +82,17 @@ class OpDescCreationMethod(object): new_attr = op_desc.attrs.add() new_attr.name = attr.name new_attr.type = attr.type - if attr.type == attribute_pb2.INT: + if attr.type == framework_pb2.INT: new_attr.i = user_defined_attr - elif attr.type == attribute_pb2.FLOAT: + elif attr.type == framework_pb2.FLOAT: new_attr.f = user_defined_attr - elif attr.type == attribute_pb2.STRING: + elif attr.type == framework_pb2.STRING: new_attr.s = user_defined_attr - elif attr.type == attribute_pb2.INTS: + elif attr.type == framework_pb2.INTS: new_attr.ints.extend(user_defined_attr) - elif attr.type == attribute_pb2.FLOATS: + elif attr.type == framework_pb2.FLOATS: new_attr.floats.extend(user_defined_attr) - elif attr.type == attribute_pb2.STRINGS: + elif attr.type == framework_pb2.STRINGS: new_attr.strings.extend(user_defined_attr) else: raise NotImplementedError("Not support attribute type " + @@ -90,50 +100,6 @@ class OpDescCreationMethod(object): return op_desc - @staticmethod - def extract_input_or_output(in_out, kwargs, meta): - """ - Extract input variable names or output variable names from key-word - arguments, which base on VarProtos. - - :param in_out: "input" or "output" - :param kwargs: key-word arguments that user inputted. - :param meta: a list of VarProto - :return: The three object will be return. The variable names. The - input_format or output_format attribute(None if the input or output is - not multiple). The temporary variable index list. - """ - multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta)) - tmp_index = [] - retv = [] - if multiple: - var_format = op_desc_pb2.AttrDesc() - var_format.type = attribute_pb2.INTS - var_format.name = "%s_format" % in_out - var_format.ints.append(0) - - for var in meta: - var_name = var.name - - if var.temporary: - var_name = [core.var_names.temp()] - tmp_index.append(len(retv)) - else: - var_name = kwargs.get(var_name, []) - if not isinstance(var_name, list): - var_name = [var_name] - retv.extend(var_name) - var_format.ints.append(len(var_name) + var_format.ints[-1]) - return retv, var_format, tmp_index - else: - for var in meta: - if var.temporary: - retv.append(kwargs.get(var.name, core.var_names.temp())) - tmp_index.append(len(retv)) - else: - retv.append(kwargs.get(var.name, core.var_names.empty())) - return retv, None, tmp_index - @staticmethod def any_is_true(generator): """ @@ -146,13 +112,12 @@ class OpDescCreationMethod(object): class OpInfo(object): - def __init__(self, name, method, inputs, outputs, attrs, no_temp_outputs): + def __init__(self, name, method, inputs, outputs, attrs): self.name = name self.method = method self.inputs = inputs self.outputs = outputs self.attrs = attrs - self.no_temp_outputs = no_temp_outputs def create_op_creation_method(op_proto): @@ -170,10 +135,7 @@ def create_op_creation_method(op_proto): name=op_proto.type, inputs=[var.name for var in op_proto.inputs], outputs=[var.name for var in op_proto.outputs], - attrs=[attr.name for attr in op_proto.attrs], - no_temp_outputs=[ - var.name for var in op_proto.outputs if not var.temporary - ]) + attrs=[attr.name for attr in op_proto.attrs]) class OperatorFactory(object): @@ -214,8 +176,5 @@ class OperatorFactory(object): def get_op_attr_names(self, type): return self.get_op_info(type).attrs - def get_op_no_temp_output_names(self, type): - return self.get_op_info(type).no_temp_outputs - Operator = OperatorFactory() # Default global factory diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index b76c05dc8142af40d9872b42cc51b3c317e095be..96fad9b42e04a88fdcbda093683b57451b2a3e41 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -24,3 +24,4 @@ py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_operator SRCS test_operator.py) # py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py) +py_test(test_recurrent_op SRCS test_recurrent_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 015e832e82560bb8b3518cbdf605c705d77cdd99..501cf6110ff745b8a6022b463bc9cc3a70145c60 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -53,15 +53,18 @@ def get_numeric_gradient(op, tensor.set(input_values[var_name], core.CPUPlace()) # Create all output variable in local_scope - for output in op.outputs(): - if local_scope.find_var(output) is None: - local_scope.new_var(output).get_tensor() - + opts = op.outputs() + for key in opts: + for output in opts[key]: + if local_scope.find_var(output) is None: + local_scope.new_var(output).get_tensor() op.infer_shape(local_scope) # allocate output memory - for output in op.outputs(): - local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace()) + for key in opts: + for output in opts[key]: + local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( + )) # TODO(yuyang18): Only CPU is support now. cpu_ctx = core.DeviceContext.create(core.CPUPlace()) @@ -150,19 +153,24 @@ class GradientChecker(unittest.TestCase): if no_grad_set is None: no_grad_set = set() - tmp_outs = forward_op.temp_outputs() - no_tmp_out = filter(lambda name: name not in tmp_outs, - forward_op.outputs()) + no_tmp_out = forward_op.no_intermediate_outputs() if len(no_tmp_out) != 1: raise ValueError("non temp out_names should be 1") - in_names = forward_op.inputs() + inputs = forward_op.inputs() + in_names = [item for k in inputs for item in inputs[k]] + outputs = forward_op.outputs() + out_names = [item for k in outputs for item in outputs[k]] + for no_grad in no_grad_set: if no_grad not in in_names: raise ValueError("no_grad should be in in_names") backward_op = core.Operator.backward(forward_op, no_grad_set) + bwd_outputs = backward_op.outputs() + bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]] + places = [core.CPUPlace()] if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): places.append(core.GPUPlace(0)) @@ -188,7 +196,7 @@ class GradientChecker(unittest.TestCase): var.set(value, place) # create output var - for out_name in forward_op.outputs(): + for out_name in out_names: scope.new_var(out_name).get_tensor() # infer the shape of output var and compute/set value of output var @@ -198,7 +206,7 @@ class GradientChecker(unittest.TestCase): # create output grad var # set shape as the output var # set value of this grad to ones - for name in forward_op.outputs(): + for name in out_names: out_tensor = scope.find_var(name).get_tensor() grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() grad_tensor.set_dims(out_tensor.shape()) @@ -206,7 +214,7 @@ class GradientChecker(unittest.TestCase): grad_tensor.set(data, place) # create input grad var - for name in backward_op.outputs(): + for name in bwd_out_names: scope.new_var(name).get_tensor() # infer the shape of input gradient var and compute/set it's value diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index c0237830647371e14b755953345965a3eac7bfd2..0def484eddb88604398ee10390d3f28058714a57 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -19,14 +19,5 @@ class TestAddOp(unittest.TestCase): self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} -class TestAddGradOp(unittest.TestCase): - def test_add_grad(self): - op = Operator('add_two', X="X", Y="Y", Out="Out") - backward_op = core.Operator.backward(op, set()) - self.assertEqual(backward_op.type(), "add_two_grad") - expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).''' - self.assertEqual(expected, str(backward_op)) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index cc7f09e7155f5b1afa47fc4133b71ae3676b7436..b42cadd11ab75abbc35763c8d12e8c27e995f0dc 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -25,12 +25,12 @@ class TestNet(unittest.TestCase): net.complete_add_op(True) expected = ''' -Op(plain_net), inputs:(W, X, Y), outputs:(Out, fc.out, pre_activation). - Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). - Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). - Op(mul), inputs:(X, W), outputs:(pre_activation). - Op(sigmoid), inputs:(pre_activation), outputs:(fc.out). +Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. + Op(add_two), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. + Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. + Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. + Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. + Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Y[fc.out]}. ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/framework/tests/test_operator.py b/python/paddle/v2/framework/tests/test_operator.py index 4f164e1a69e3fd0409f9b575a8bd9b4e423b486b..1abc4eeb57bcedc81e34b0e156048ee4f5cfdc2d 100644 --- a/python/paddle/v2/framework/tests/test_operator.py +++ b/python/paddle/v2/framework/tests/test_operator.py @@ -1,9 +1,7 @@ import unittest import paddle.v2.framework.op as op import paddle.v2.framework.core as core -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 -import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 -import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2 +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 class TestGetAllProtos(unittest.TestCase): @@ -17,7 +15,7 @@ class TestGetAllProtos(unittest.TestCase): class TestOpDescCreationMethod(unittest.TestCase): def test_plain_input_output(self): - op_proto = op_proto_pb2.OpProto() + op_proto = framework_pb2.OpProto() op_proto.type = "test" ipt = op_proto.inputs.add() ipt.name = "X" @@ -37,25 +35,32 @@ class TestOpDescCreationMethod(unittest.TestCase): method = op.OpDescCreationMethod(op_proto) output = method(X="a", Y="b", Z="c") - - expected = op_desc_pb2.OpDesc() + expected = framework_pb2.OpDesc() expected.type = "test" - expected.inputs.extend(["a", "b"]) - expected.outputs.append("c") + ipt_0 = expected.inputs.add() + ipt_0.parameter = "X" + ipt_0.arguments.extend(["a"]) + ipt_1 = expected.inputs.add() + ipt_1.parameter = 'Y' + ipt_1.arguments.extend(['b']) + opt = expected.outputs.add() + opt.parameter = "Z" + opt.arguments.extend(["c"]) + self.assertEqual(expected, output) def test_multiple_input_plain_output(self): - op_proto = op_proto_pb2.OpProto() + op_proto = framework_pb2.OpProto() op_proto.type = "fc" ipt = op_proto.inputs.add() ipt.name = "X" ipt.comment = "" - ipt.multiple = True + ipt.duplicable = True ipt = op_proto.inputs.add() ipt.name = "W" ipt.comment = "" - ipt.multiple = True + ipt.duplicable = True ipt = op_proto.inputs.add() ipt.name = "b" @@ -70,30 +75,50 @@ class TestOpDescCreationMethod(unittest.TestCase): method = op.OpDescCreationMethod(op_proto) generated1 = method(X="x", W="w", b="b", Y="y") - expected1 = op_desc_pb2.OpDesc() - expected1.inputs.extend(['x', 'w', 'b']) - expected1.outputs.extend(['y']) + expected1 = framework_pb2.OpDesc() + tmp = expected1.inputs.add() + tmp.parameter = "X" + tmp.arguments.extend(['x']) + + tmp = expected1.inputs.add() + tmp.parameter = 'W' + tmp.arguments.extend(['w']) + + tmp = expected1.inputs.add() + tmp.parameter = 'b' + tmp.arguments.extend(['b']) + + tmp = expected1.outputs.add() + tmp.parameter = 'Y' + tmp.arguments.extend(['y']) expected1.type = 'fc' - attr = expected1.attrs.add() - attr.name = 'input_format' - attr.type = attribute_pb2.INTS - attr.ints.extend([0, 1, 2, 3]) self.assertEqual(expected1, generated1) generated2 = method( X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y') - expected2 = op_desc_pb2.OpDesc() - expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b']) - expected2.outputs.extend(['y']) + expected2 = framework_pb2.OpDesc() + + tmp = expected2.inputs.add() + tmp.parameter = "X" + tmp.arguments.extend(['x1', 'x2', 'x3']) + + tmp = expected2.inputs.add() + tmp.parameter = 'W' + tmp.arguments.extend(['w1', 'w2', 'w3']) + + tmp = expected2.inputs.add() + tmp.parameter = 'b' + tmp.arguments.extend(['b']) + + tmp = expected2.outputs.add() + tmp.parameter = 'Y' + tmp.arguments.extend(['y']) + expected2.type = 'fc' - attr = expected2.attrs.add() - attr.name = 'input_format' - attr.type = attribute_pb2.INTS - attr.ints.extend([0, 3, 6, 7]) self.assertEqual(expected2, generated2) def test_attrs(self): - op_proto = op_proto_pb2.OpProto() + op_proto = framework_pb2.OpProto() op_proto.type = "test" ipt = op_proto.inputs.add() ipt.name = 'X' @@ -105,12 +130,12 @@ class TestOpDescCreationMethod(unittest.TestCase): attr.comment = "" attr.type = type - __add_attr__("int_attr", attribute_pb2.INT) - __add_attr__("float_attr", attribute_pb2.FLOAT) - __add_attr__("string_attr", attribute_pb2.STRING) - __add_attr__("ints_attr", attribute_pb2.INTS) - __add_attr__("floats_attr", attribute_pb2.FLOATS) - __add_attr__("strings_attr", attribute_pb2.STRINGS) + __add_attr__("int_attr", framework_pb2.INT) + __add_attr__("float_attr", framework_pb2.FLOAT) + __add_attr__("string_attr", framework_pb2.STRING) + __add_attr__("ints_attr", framework_pb2.INTS) + __add_attr__("floats_attr", framework_pb2.FLOATS) + __add_attr__("strings_attr", framework_pb2.STRINGS) op_proto.comment = "" self.assertTrue(op_proto.IsInitialized()) @@ -126,76 +151,52 @@ class TestOpDescCreationMethod(unittest.TestCase): floats_attr=[0.2, 3.2, 4.5], strings_attr=["a", "b", "c"]) - expected = op_desc_pb2.OpDesc() + expected = framework_pb2.OpDesc() expected.type = "test" - expected.inputs.extend(['a']) + + ipt = expected.inputs.add() + ipt.parameter = "X" + ipt.arguments.extend(['a']) + attr = expected.attrs.add() attr.name = "int_attr" - attr.type = attribute_pb2.INT + attr.type = framework_pb2.INT attr.i = 10 attr = expected.attrs.add() attr.name = "float_attr" - attr.type = attribute_pb2.FLOAT + attr.type = framework_pb2.FLOAT attr.f = 3.2 attr = expected.attrs.add() attr.name = "string_attr" - attr.type = attribute_pb2.STRING + attr.type = framework_pb2.STRING attr.s = "test_str" attr = expected.attrs.add() attr.name = "ints_attr" - attr.type = attribute_pb2.INTS + attr.type = framework_pb2.INTS attr.ints.extend([0, 1, 2, 3, 4]) attr = expected.attrs.add() attr.name = 'floats_attr' - attr.type = attribute_pb2.FLOATS + attr.type = framework_pb2.FLOATS attr.floats.extend([0.2, 3.2, 4.5]) attr = expected.attrs.add() attr.name = 'strings_attr' - attr.type = attribute_pb2.STRINGS + attr.type = framework_pb2.STRINGS attr.strings.extend(['a', 'b', 'c']) self.assertEqual(expected, generated) - def test_input_temporary_output(self): - op_proto = op_proto_pb2.OpProto() - op_proto.type = "test" - out = op_proto.outputs.add() - out.name = "OUT" - out.comment = "" - - out = op_proto.outputs.add() - out.name = "TMP" - out.comment = "" - out.temporary = True - - out = op_proto.outputs.add() - out.name = "OUT2" - out.comment = "" - op_proto.comment = "" - - method = op.OpDescCreationMethod(op_proto) - generated = method(OUT="a", OUT2="b") - desc = op_desc_pb2.OpDesc() - desc.outputs.extend(["a", core.var_names.temp(), "b"]) - desc.type = "test" - attr = desc.attrs.add() - attr.name = "temporary_index" - attr.type = attribute_pb2.INTS - attr.ints.append(2) - self.assertEqual(generated, desc) - class TestOpCreations(unittest.TestCase): def test_all(self): add_op = op.Operator("add_two", X="a", Y="b", Out="z") self.assertIsNotNone(add_op) # Invoke C++ DebugString() - self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', + self.assertEqual('Op(add_two), inputs:{X[a], Y[b]}, outputs:{Out[z]}.', str(add_op)) diff --git a/python/paddle/v2/framework/tests/test_protobuf.py b/python/paddle/v2/framework/tests/test_protobuf.py index 69e98e2f250a9df23b25e7e2043af29f87c996a0..848a396b3b6eec57d500b464780b64f339b09e94 100644 --- a/python/paddle/v2/framework/tests/test_protobuf.py +++ b/python/paddle/v2/framework/tests/test_protobuf.py @@ -1,11 +1,10 @@ -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_lib -import paddle.v2.framework.proto.attribute_pb2 as attr_type_lib +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import unittest class TestFrameworkProto(unittest.TestCase): def test_all(self): - op_proto = op_proto_lib.OpProto() + op_proto = framework_pb2.OpProto() ipt0 = op_proto.inputs.add() ipt0.name = "a" ipt0.comment = "the input of cosine op" @@ -19,7 +18,7 @@ class TestFrameworkProto(unittest.TestCase): attr = op_proto.attrs.add() attr.name = "scale" attr.comment = "scale of cosine op" - attr.type = attr_type_lib.FLOAT + attr.type = framework_pb2.FLOAT op_proto.type = "cos" self.assertTrue(op_proto.IsInitialized()) diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 5c77c477b347f4713e4af2a8cb462b243d7a779c..0db66cc4e181fde10f161a323ea749fd84a5f963 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -2,19 +2,74 @@ import logging import paddle.v2.framework.core as core import unittest import numpy as np -import paddle.v2.framework.create_op_creation_methods as creation +from paddle.v2.framework.op import Operator -ops = creation.op_creations +def py_sigmoid(x): + return 1. / (1. + np.exp(-x)) -def create_tensor(scope, name, shape): + +class PySimpleRNN(object): + ''' + A simple implementation of RNN based on numpy, to futhur test RecurrentOp's alogorithm + ''' + + def __init__(self, input_dim=30, batch_size=50, weight_dim=15, sent_len=11): + self.x = np.random.normal(size=(sent_len, batch_size, input_dim)) + self.W = np.random.normal(size=(input_dim, input_dim)) + self.U = np.random.normal(size=(input_dim, input_dim)) + self.h_boot = np.random.normal(size=(batch_size, input_dim)) + + # memories + self.mems = [ + np.zeros(shape=(batch_size, input_dim)) for i in range(sent_len) + ] + + def forward(self): + xs = self.segment_inputs() + for step_id in range(self.x.shape[0]): + self.step(step_id, xs[step_id]) + return self.concat_outputs() + + def segment_inputs(self): + return [self.x[i] for i in range(self.x.shape[0])] + + def concat_outputs(self): + return np.array(self.mems) + + def step(self, step_id, x): + ''' + run a step + ''' + mem = self.mems[step_id] + if step_id > 0: + pre_mem = self.mems[step_id - 1] + else: + pre_mem = self.h_boot + xW = np.matmul(x, self.W) + hU = np.matmul(mem, self.U) + + sum = xW + hU + self.mems[step_id] = py_sigmoid(sum) + + +class PySimpleRNNTest(unittest.TestCase): + def setUp(self): + self.rnn = PySimpleRNN() + + def test_forward(self): + output = self.rnn.forward() + print 'output', output + + +def create_tensor(scope, name, shape, np_data): tensor = scope.new_var(name).get_tensor() tensor.set_dims(shape) - tensor.set(np.random.random(shape), core.CPUPlace()) + tensor.set(np_data, core.CPUPlace()) return tensor -class TestRNN(unittest.TestCase): +class TestRecurrentOp(unittest.TestCase): ''' Test RNNOp @@ -28,7 +83,7 @@ class TestRNN(unittest.TestCase): memories: - h outputs: - - h + - h ''' input_dim = 30 @@ -36,33 +91,45 @@ class TestRNN(unittest.TestCase): weight_dim = 15 sent_len = 11 - def init(self): + def setUp(self): + self.py_rnn = PySimpleRNN(self.input_dim, self.batch_size, + self.weight_dim, self.sent_len) + def forward(self): self.scope = core.Scope() - self.create_global_variables() self.create_step_net() rnn_op = self.create_rnn_op() ctx = core.DeviceContext.create(core.CPUPlace()) - print 'infer_shape' rnn_op.infer_shape(self.scope) - rnn_op.run(self.scope, ctx) + return np.array(self.scope.find_var("h").get_tensor()) def create_global_variables(self): # create inlink + x_np_data = self.py_rnn.x create_tensor(self.scope, "x", - [self.sent_len, self.batch_size, self.input_dim]) - create_tensor(self.scope, "W", [self.input_dim, self.input_dim]) - create_tensor(self.scope, "U", [self.input_dim, self.input_dim]) - create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim]) + [self.sent_len, self.batch_size, self.input_dim], + x_np_data) + W_np_data = self.py_rnn.W + create_tensor(self.scope, "W", [self.input_dim, self.input_dim], + W_np_data) + + U_np_data = self.py_rnn.U + create_tensor(self.scope, "U", [self.input_dim, self.input_dim], + U_np_data) + + h_boot_np_data = self.py_rnn.h_boot + create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim], + h_boot_np_data) self.scope.new_var("step_scopes") self.scope.new_var("h@alias") self.scope.new_var("h") def create_rnn_op(self): # create RNNOp - rnnop = ops.recurrent_op( + rnnop = Operator( + "recurrent_op", # inputs inlinks=["x"], boot_memories=["h_boot"], @@ -81,17 +148,25 @@ class TestRNN(unittest.TestCase): var = self.scope.new_var("stepnet") stepnet = var.get_net() - x_fc_op = ops.fc(X="x@alias", W="W", Y="Wx") - h_fc_op = ops.fc(X="h@pre", W="U", Y="Uh") - sum_op = ops.add_two(X="Wx", Y="Uh", Out="sum") - sig_op = ops.sigmoid(X="sum", Y="h@alias") + # x_fc_op = Operator("fc", X="x@alias", W="W", Y="Wx") + # h_fc_op = Operator("fc", X="h@pre", W="U", Y="Uh") + x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: stepnet.add_op(op) stepnet.complete_add_op(True) - def test_recurrent(self): - self.init() + def test_forward(self): + print 'test recurrent op forward' + pd_output = self.forward() + py_output = self.py_rnn.forward() + print 'pd_output', pd_output + print + print 'py_output', py_output + self.assertEqual(pd_output.shape, py_output.shape) if __name__ == '__main__':