diff --git a/CMakeLists.txt b/CMakeLists.txt index 236363944e1c3e2d3a828ccafced69dae5ab7810..59407f638f1afc595df0de4fc3db1f53718eec05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 656e1a0803c6e389d70f37f592c3aa2e95a2bcd4..e50530411cc74392091c8026fa012ec7631f7f6b 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -56,11 +56,14 @@ macro(add_style_check_target TARGET_NAME) # cpplint code style get_filename_component(base_filename ${filename} NAME) set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint) - add_custom_command(TARGET ${TARGET_NAME} PRE_BUILD + add_custom_command(OUTPUT ${CUR_GEN} PRE_BUILD COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" "--filter=${STYLE_FILTER}" "--write-success=${CUR_GEN}" ${filename} + DEPENDS ${filename} ${PROJ_ROOT}/paddle/scripts/cpplint.py WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_target(${base_filename}.cpplint DEPENDS ${CUR_GEN}) + add_dependencies(${TARGET_NAME} ${base_filename}.cpplint) endif() endforeach() endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 534be0abe246ac70950d85ad05441825c8ca768a..41b9b5928958ae31799c396a8d77fd7cff557905 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -187,7 +187,13 @@ function(cc_library TARGET_NAME) endif() # cpplint code style - add_style_check_target(${TARGET_NAME} ${cc_library_SRCS}) + foreach(source_file ${cc_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS}) else(cc_library_SRCS) if (cc_library_DEPS) @@ -239,6 +245,14 @@ function(nv_library TARGET_NAME) add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) endif() + # cpplint code style + foreach(source_file ${nv_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS}) else(nv_library_SRCS) if (nv_library_DEPS) merge_static_libs(${TARGET_NAME} ${nv_library_DEPS}) diff --git a/cmake/util.cmake b/cmake/util.cmake index 9790016df98411dcae25f2f3f1aa23f606a8726c..3391527e5ad8d703726d66463b99e1ab340da98f 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -118,7 +118,6 @@ endfunction() macro(add_unittest_without_exec TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) link_paddle_test(${TARGET_NAME}) - add_style_check_target(${TARGET_NAME} ${ARGN}) endmacro() # add_unittest diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 4b06966fba2bc9f92756be0cb8110bbcd5272423..f8a88cf317aee6c5dd25e4cc25d588c6c50fcbce 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -15,7 +15,6 @@ if(Boost_FOUND) add_subdirectory(platform) add_subdirectory(framework) add_subdirectory(operators) - add_subdirectory(pybind) endif() if(WITH_C_API) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 12a3a00bba35d476fca9c9fb47ac20b87e6f53f2..1db042c6fc8b6c4ea7c3854ea4b1cd016deeb0b6 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -12,13 +12,15 @@ cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) cc_test(scope_test SRCS scope_test.cc DEPS scope) -proto_library(attr_type SRCS attr_type.proto) -proto_library(op_proto SRCS op_proto.proto DEPS attr_type) -proto_library(op_desc SRCS op_desc.proto DEPS attr_type) +proto_library(attribute_proto SRCS attribute.proto) +proto_library(op_proto SRCS op_proto.proto DEPS attribute_proto) +proto_library(op_desc SRCS op_desc.proto DEPS attribute_proto) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) -cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope) +cc_library(attribute SRCS attribute.cc DEPS op_desc op_proto) + +cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope attribute) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS op_proto operator) @@ -26,13 +28,19 @@ cc_library(op_registry SRCS op_registry.cc DEPS op_desc grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) -py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) +py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) -cc_library(net SRCS net.cc DEPS op_registry) -cc_test(net_op_test SRCS net_op_test.cc DEPS net) - -cc_library(backward SRCS backward.cc DEPS net) +cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) +cc_library(paddle_pybind SHARED + SRCS pybind.cc + DEPS pybind python backward + fc_op + sgd_op + add_op + mean_op + cross_entropy_op + recurrent_op) diff --git a/paddle/framework/attribute.cc b/paddle/framework/attribute.cc new file mode 100644 index 0000000000000000000000000000000000000000..4c5790693b7e48396e945d09f4fdc72b86aa5978 --- /dev/null +++ b/paddle/framework/attribute.cc @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/attribute.h" + +#include + +namespace paddle { +namespace framework { + +template <> +AttrType AttrTypeID() { + return INT; +} +template <> +AttrType AttrTypeID() { + return FLOAT; +} +template <> +AttrType AttrTypeID() { + return STRING; +} +template <> +AttrType AttrTypeID>() { + return INTS; +} +template <> +AttrType AttrTypeID>() { + return FLOATS; +} +template <> +AttrType AttrTypeID>() { + return STRINGS; +} + +Attribute GetAttrValue(const AttrDesc& attr_desc) { + switch (attr_desc.type()) { + case paddle::framework::AttrType::INT: { + return attr_desc.i(); + } + case paddle::framework::AttrType::FLOAT: { + return attr_desc.f(); + } + case paddle::framework::AttrType::STRING: { + return attr_desc.s(); + } + case paddle::framework::AttrType::INTS: { + std::vector val(attr_desc.ints_size()); + for (int i = 0; i < attr_desc.ints_size(); ++i) { + val[i] = attr_desc.ints(i); + } + return val; + } + case paddle::framework::AttrType::FLOATS: { + std::vector val(attr_desc.floats_size()); + for (int i = 0; i < attr_desc.floats_size(); ++i) { + val[i] = attr_desc.floats(i); + } + return val; + } + case paddle::framework::AttrType::STRINGS: { + std::vector val(attr_desc.strings_size()); + for (int i = 0; i < attr_desc.strings_size(); ++i) { + val[i] = attr_desc.strings(i); + } + return val; + } + } + PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !"); + return boost::blank(); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/attr_checker.h b/paddle/framework/attribute.h similarity index 79% rename from paddle/framework/attr_checker.h rename to paddle/framework/attribute.h index ea5614a45f3a77a851358aff80abbc276c9972ba..3a5820e9c60539e3c771df5da4e82f6c1cae688f 100644 --- a/paddle/framework/attr_checker.h +++ b/paddle/framework/attribute.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once #include @@ -6,6 +20,9 @@ #include #include #include + +#include "paddle/framework/attribute.pb.h" +#include "paddle/framework/op_desc.pb.h" #include "paddle/platform/enforce.h" namespace paddle { @@ -14,13 +31,19 @@ namespace framework { typedef boost::variant, std::vector, std::vector> Attribute; + typedef std::unordered_map AttributeMap; +template +AttrType AttrTypeID(); + +Attribute GetAttrValue(const AttrDesc& attr_desc); + // check whether a value(attribute) fit a certain limit template class LargerThanChecker { public: - LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {} + explicit LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {} void operator()(T& value) const { PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail"); } @@ -35,7 +58,8 @@ class LargerThanChecker { template class DefaultValueSetter { public: - DefaultValueSetter(T default_value) : default_value_(default_value) {} + explicit DefaultValueSetter(T default_value) + : default_value_(default_value) {} void operator()(T& value) const { value = default_value_; } private: @@ -78,7 +102,8 @@ class TypedAttrChecker { typedef std::function ValueChecker; public: - TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {} + explicit TypedAttrChecker(const std::string& attr_name) + : attr_name_(attr_name) {} TypedAttrChecker& InEnum(const std::unordered_set& range) { value_checkers_.push_back(EnumInContainer(range)); diff --git a/paddle/framework/attr_type.proto b/paddle/framework/attribute.proto similarity index 100% rename from paddle/framework/attr_type.proto rename to paddle/framework/attribute.proto diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index e784bb2b7d7ddd087a75371e508840e15c952473..13706f8b562a1d68fe0d603f51c2fb47b4e18164 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -14,8 +14,8 @@ #include "paddle/framework/backward.h" #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" namespace paddle { namespace framework { @@ -32,7 +32,7 @@ static bool AllInSet(const std::vector& names, } static std::shared_ptr NOP() { - auto net_op = std::make_shared(); + auto net_op = std::make_shared(); net_op->type_ = "@NOP@"; net_op->CompleteAddOp(); return net_op; @@ -42,9 +42,9 @@ static std::shared_ptr NOP() { // // no_grad_names the gradient variable names without gradient calculating. // -// uniq_id is a unique index used inside recursively calling BackwardRecursive. -// use `uid = uniq_id++;` to get the unique index, and pass `uniq_id` through -// recursive calling. +// uniq_id is a unique index used inside recursively calling +// BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and +// pass `uniq_id` through recursive calling. // // returns The backward operator. For simple situation, it is a simple // operator. For complex situation, it is a NetOp. @@ -59,32 +59,30 @@ std::shared_ptr BackwardRecursive( // If all input gradients of forwarding operator do not need to calculate, // just return an NOP. Not return null ptr because NOP does not take // too much time for calculation, but it is useful for simplifying logic. - if (AllInSet(forwardOp.inputs_, OperatorBase::GRAD_VAR_SUFFIX(), - no_grad_names)) { + if (AllInSet(forwardOp.inputs_, kGradVarSuffix, no_grad_names)) { return NOP(); } - // All output gradients of forwarding operator do not need to calculate. Then - // all input gradients cannot be computed at all, and we put them into + // All output gradients of forwarding operator do not need to calculate. + // Then all input gradients cannot be computed at all, and we put them into // `no_grad_names` set. Return an NOP. - if (AllInSet(forwardOp.outputs_, OperatorBase::GRAD_VAR_SUFFIX(), - no_grad_names)) { + if (AllInSet(forwardOp.outputs_, kGradVarSuffix, no_grad_names)) { for (auto& name : forwardOp.inputs_) { // Mark all input is not need - no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX()); + no_grad_names.insert(name + kGradVarSuffix); } return NOP(); } // Returned gradient network - auto net = std::make_shared(); + auto net = std::make_shared(); if (forwardOp.IsNetOp()) { // Because forwardOp is a net op, it can static_cast. - auto& forwardNet = static_cast(forwardOp); + auto& forwardNet = static_cast(forwardOp); - // Map from output gradient variable name to operator's indices in backward - // net. That operator generates that variable. + // Map from output gradient variable name to operator's indices in + // backward net. That operator generates that variable. std::unordered_map> dup_output_ops; size_t local_op_id = 0; @@ -134,9 +132,9 @@ std::shared_ptr BackwardRecursive( std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); for (std::string& grad_input : grad_op->inputs_) { if (no_grad_names.count(grad_input)) { - std::string prefix = grad_input.substr( - 0, grad_input.size() - OperatorBase::GRAD_VAR_SUFFIX().size()); - grad_input = prefix + OperatorBase::ZERO_VAR_SUFFIX(); + std::string prefix = + grad_input.substr(0, grad_input.size() - kGradVarSuffix.size()); + grad_input = prefix + kZeroVarSuffix; // If part of input gradient of that operator is not calculated, fill // zero variables to that input gradient. @@ -147,7 +145,7 @@ std::shared_ptr BackwardRecursive( for (std::string& grad_output : grad_op->outputs_) { if (no_grad_names.count(grad_output)) { - grad_output = OperatorBase::EMPTY_VAR_NAME(); + grad_output = kEmptyVarName; } } @@ -168,14 +166,14 @@ std::shared_ptr Backward( std::unordered_set no_grad_names; no_grad_names.reserve(no_grad_vars.size()); - no_grad_names.insert(OperatorBase::EMPTY_VAR_NAME() + - OperatorBase::GRAD_VAR_SUFFIX()); + no_grad_names.insert(kEmptyVarName + kGradVarSuffix); for (auto& name : no_grad_vars) { - no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX()); + no_grad_names.insert(name + kGradVarSuffix); } size_t uid = 0; return BackwardRecursive(forwardOp, no_grad_names, uid); } + } // namespace framework } // namespace paddle diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index b095c2c3d5dbf21b5ea70e17475a4aaad9b1db44..6c6e12ca254553a8fc02cadbe3a99989ee848943 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -15,8 +15,9 @@ #include "paddle/framework/backward.h" #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace framework { @@ -70,21 +71,21 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { } }; -class FcOp : public NetOp { +class FcOp : public ops::NetOp { public: void Init() override { AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, {Output("mul_result")}, {})); auto b_name = Input("b"); std::string before_act = "mul_result"; - if (b_name != EMPTY_VAR_NAME()) { + if (b_name != kEmptyVarName) { AddOp(OpRegistry::CreateOp("rowwise_add", {Output("mul_result"), b_name}, {Output("add_result")}, {})); before_act = "add_result"; } else { auto out_varname = Output("add_result"); - if (out_varname != EMPTY_VAR_NAME()) { - this->Rename(out_varname, EMPTY_VAR_NAME()); + if (out_varname != kEmptyVarName) { + this->Rename(out_varname, kEmptyVarName); } } @@ -161,14 +162,13 @@ TEST(Backward, simple_op_grad) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); ASSERT_NE(fwd, nullptr); auto gop = f::OpRegistry::CreateGradOp(*fwd); - ASSERT_EQ(1UL, gop->inputs_.size()); - ASSERT_EQ("Out" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->inputs_[0]); + ASSERT_EQ(4UL, gop->inputs_.size()); + ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); ASSERT_EQ("rowwise_add_grad", gop->type_); - ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[0]); - ASSERT_EQ("b" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[1]); + ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]); + ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]); - ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), - gop->Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX())); + ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix)); } TEST(Backward, simple_op_not_need_grad) { @@ -176,13 +176,14 @@ TEST(Backward, simple_op_not_need_grad) { ASSERT_NE(fwd, nullptr); auto gop = f::Backward(*fwd, {"X"}); ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), - "X" + f::OperatorBase::GRAD_VAR_SUFFIX()), + "X" + f::kGradVarSuffix), gop->outputs_.end()); auto no_input_gop = f::Backward(*fwd, {"X", "b"}); ASSERT_NE(no_input_gop, nullptr); ASSERT_TRUE(no_input_gop->IsNetOp()); - ASSERT_EQ(0UL, std::static_pointer_cast(no_input_gop)->ops_.size()); + ASSERT_EQ(0UL, + std::static_pointer_cast(no_input_gop)->ops_.size()); } TEST(Backward, net_fc_backward_normal) { @@ -191,7 +192,7 @@ TEST(Backward, net_fc_backward_normal) { ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); + auto net = static_cast(gop.get()); ASSERT_NO_THROW(net->DebugString()); @@ -208,13 +209,13 @@ TEST(Backward, net_fc_backward_normal) { } TEST(Backward, net_fc_backward_not_have_b) { - std::shared_ptr fwd = f::OpRegistry::CreateOp( - "fc", {"X", "w", f::OperatorBase::EMPTY_VAR_NAME()}, - {"mul_result", "add_result", "tmp"}, {}); + std::shared_ptr fwd = + f::OpRegistry::CreateOp("fc", {"X", "w", f::kEmptyVarName}, + {"mul_result", "add_result", "tmp"}, {}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); + auto net = static_cast(gop.get()); ASSERT_NO_THROW(net->DebugString()); @@ -228,7 +229,7 @@ TEST(Backward, net_fc_backward_not_have_b) { } TEST(Backward, net_input_of_network_not_need_grad) { - f::NetOp net; + ops::NetOp net; net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"}, {"mul_tmp_0", "add_tmp_0", "hidden0"}, {})); net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"}, @@ -236,39 +237,36 @@ TEST(Backward, net_input_of_network_not_need_grad) { net.CompleteAddOp(); auto bwd = Backward(net, {"X"}); // X@GRAD is not need. ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); + auto bwd_net = static_cast(bwd.get()); std::unordered_set all_output = std::unordered_set( bwd_net->outputs_.begin(), bwd_net->outputs_.end()); - all_output.erase(f::OperatorBase::EMPTY_VAR_NAME()); + all_output.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_output.find(out + f::OperatorBase::GRAD_VAR_SUFFIX()), - all_output.end()); + ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end()); } // Not Generated X - ASSERT_EQ(all_output.find("X" + f::OperatorBase::GRAD_VAR_SUFFIX()), - all_output.end()); + ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); - auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); + auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); - ASSERT_EQ( - f::OperatorBase::EMPTY_VAR_NAME(), - first_fc_grad->ops_[2]->Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX())); + ASSERT_EQ(f::kEmptyVarName, + first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix)); } TEST(Backward, net_shared_weight) { - f::NetOp net; + ops::NetOp net; net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {})); net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {})); net.CompleteAddOp(); auto bwd = f::Backward(net, {}); ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); + auto bwd_net = static_cast(bwd.get()); ASSERT_EQ(3UL, bwd_net->ops_.size()); ASSERT_EQ("add", bwd_net->ops_[2]->type_); } @@ -285,7 +283,7 @@ TEST(Backward, op_all_input_are_not_need) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); auto backward = f::Backward(*fwd, {"X", "b"}); ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); + auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } @@ -293,7 +291,7 @@ TEST(Backward, op_all_output_are_not_need) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); auto backward = f::Backward(*fwd, {"Out"}); ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); + auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } @@ -301,7 +299,7 @@ TEST(Backward, op_part_of_output_are_not_need) { auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {}); auto backward = f::Backward(*fwd, {"Z"}); ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); + auto net = static_cast(backward.get()); ASSERT_EQ(net->ops_.size(), 2UL); auto &fill_zero = *net->ops_[0]; @@ -309,17 +307,15 @@ TEST(Backward, op_part_of_output_are_not_need) { ASSERT_EQ(1UL, fill_zero.inputs_.size()); ASSERT_EQ("Z", fill_zero.inputs_[0]); ASSERT_EQ(1UL, fill_zero.outputs_.size()); - ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(), fill_zero.outputs_[0]); + ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.type_); ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG - ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(), - d_many_out.Input("z" + f::OperatorBase::GRAD_VAR_SUFFIX())); - ASSERT_EQ("Y" + f::OperatorBase::GRAD_VAR_SUFFIX(), - d_many_out.Input("y" + f::OperatorBase::GRAD_VAR_SUFFIX())); - ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), - d_many_out.Output("x" + f::OperatorBase::GRAD_VAR_SUFFIX())); + ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix)); + ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix)); + ASSERT_EQ("X" + f::kGradVarSuffix, + d_many_out.Output("x" + f::kGradVarSuffix)); } TEST(Backward, op_part_of_input_are_not_need) { @@ -329,19 +325,17 @@ TEST(Backward, op_part_of_input_are_not_need) { ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL); - ASSERT_EQ(grad_mul.Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()), - f::OperatorBase::EMPTY_VAR_NAME()); - ASSERT_EQ(grad_mul.Output("B" + f::OperatorBase::GRAD_VAR_SUFFIX()), - "b" + f::OperatorBase::GRAD_VAR_SUFFIX()); - ASSERT_EQ(grad_mul.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()), - "out" + f::OperatorBase::GRAD_VAR_SUFFIX()); + ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName); + ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix); + ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix), + "out" + f::kGradVarSuffix); ASSERT_EQ(grad_mul.Input("A"), "a"); ASSERT_EQ(grad_mul.Input("B"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); } TEST(Backward, linear_net_intermediate_variable_has_no_grad) { - f::NetOp net; + ops::NetOp net; net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"}, {"mul_out1", "add_out1", "out1"}, {})); net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"}, @@ -351,14 +345,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { net.CompleteAddOp(); auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); ASSERT_TRUE(backward->IsNetOp()); - auto bwd_net = static_cast(backward.get()); + auto bwd_net = static_cast(backward.get()); ASSERT_EQ(bwd_net->ops_.size(), 3UL); auto &grad_fc = *bwd_net->ops_[0]; EXPECT_EQ(grad_fc.inputs_.size(), 3UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ - - 1UL /*ignoreGradient varable number*/ + 2U /* internal variable number*/); EXPECT_EQ(grad_fc.outputs_.size(), 2UL /* input number of mul*/ + 2UL /* input number of rowwise_add */ @@ -367,23 +360,4 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { EXPECT_EQ(bwd_net->ops_[1]->outputs_.size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->inputs_.size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->outputs_.size(), 0UL); - - /* - EXPECT_EQ(grad_fc.Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()), - f::OperatorBase::EMPTY_VAR_NAME()); - EXPECT_EQ(grad_fc.Output("W" + f::OperatorBase::GRAD_VAR_SUFFIX()), - "w3" + f::OperatorBase::GRAD_VAR_SUFFIX()); - EXPECT_EQ(grad_fc.Output("b" + f::OperatorBase::GRAD_VAR_SUFFIX()), - "b3" + f::OperatorBase::GRAD_VAR_SUFFIX()); - EXPECT_EQ(grad_fc.Output("mul_result" + f::OperatorBase::GRAD_VAR_SUFFIX()), - "mul_out3" + f::OperatorBase::GRAD_VAR_SUFFIX()); - - EXPECT_EQ(grad_fc.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()), - "out3" + f::OperatorBase::GRAD_VAR_SUFFIX()); - EXPECT_EQ(grad_fc.Input("X"), "out2"); - EXPECT_EQ(grad_fc.Input("W"), "w3"); - EXPECT_EQ(grad_fc.Input("mul_result"), "mul_out3"); - EXPECT_EQ(grad_fc.Input("add_result"), "tmp_out3"); - EXPECT_EQ(grad_fc.Input("Out"), "out3"); - */ } diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 9fcc657edcd5459d0a42a64d708603a4bcd53cf0..5aa5af0c19be5a209c760282cb1a090fc57a53ad 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -25,18 +25,15 @@ limitations under the License. */ namespace paddle { namespace framework { -namespace { -typedef boost::variant, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>, - Dim<8>, Dim<9>> - DDimVar; -} - /** * \brief A dynamically sized dimension. * * The number of dimensions must be between [1, 9]. */ struct DDim { + typedef boost::variant, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>, + Dim<8>, Dim<9>> + DDimVar; DDimVar var; DDim() : var(Dim<1>()) {} diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index dd686cc78246f06cdc3ec7d013086863d7e8fac0..6d032fb78f099f5142d64e531d1a03c10ed5e68e 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -8,107 +8,95 @@ You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ +WITHOpArgType::OUT WARRANTIES OR CONDITIONS OF ANY KOpArgType::IND, either +express or implied. See the License for the specific language governing +permissions and limitations under the License. */ #include "paddle/framework/grad_op_builder.h" +#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace framework { -OperatorBase* GradOpBuilder::Build() { - BuildOpInOutArgList(); - std::string grad_op_type = OpRegistry::grad_ops().at(op_.type_); - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - grad_op->type_ = grad_op_type; - CompleteGradOp(grad_op); - return grad_op; -} +class OpRegistry; + +using VarIndexMap = std::unordered_map; -OpInOutArg* GradOpBuilder::BuildArg(const VarProto& var, - const VarIndexMap& var_map, - const std::vector& format, - InOutType type) { - int idx = var_map.at(var.name()); - int begin_idx = format.empty() ? idx : format.at(idx); - int end_idx = format.empty() ? idx + 1 : format.at(idx + 1); - return new OpInOutArg(var.name(), type, !var.ignore_gradient(), begin_idx, - end_idx); +enum class OpArgType { IN, OUT }; + +static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { + std::string key = type == OpArgType::IN ? "input_format" : "output_format"; + return op->attrs_.count(key) + ? &boost::get>(op->attrs_.at(key)) + : nullptr; } -void GradOpBuilder::BuildOpInOutArgList() { - const OpProto& op_proto = OpRegistry::protos().at(op_.type_); - const auto& var_map = *(OpRegistry::VarIndexMaps().at(op_.type_)); - const std::vector& in_format = - op_.attrs_.count("input_format") - ? op_.GetAttr>("input_format") - : std::vector(); - const std::vector& out_format = - op_.attrs_.count("output_format") - ? op_.GetAttr>("output_format") - : std::vector(); - for (const auto& var : op_proto.inputs()) { - arg_list_.emplace_back( - std::shared_ptr(BuildArg(var, var_map, in_format, IN))); - } - for (const auto& var : op_proto.outputs()) { - arg_list_.emplace_back( - std::shared_ptr(BuildArg(var, var_map, out_format, OUT))); - } +static const std::vector* GetOpFormat(const OperatorBase* op, + const OpArgType& type) { + std::string key = type == OpArgType::IN ? "input_format" : "output_format"; + return op->attrs_.count(key) + ? &boost::get>(op->attrs_.at(key)) + : nullptr; } -void GradOpBuilder::AddArgIntoGradOp(const OpInOutArg* arg, - std::vector& in_out, - std::vector& format, - VarIndexMap* varmap, int& idx, - bool is_grad) const { - std::string var_name = arg->proto_name_; - if (is_grad) { - var_name += OperatorBase::GRAD_VAR_SUFFIX(); - } - (*varmap)[var_name] = idx++; - size_t pre_sz = in_out.size(); - auto base_it = arg->type_ == IN ? op_.inputs_.begin() : op_.outputs_.begin(); - std::copy(base_it + arg->begin_idx_, base_it + arg->end_idx_, - std::back_inserter(in_out)); - if (is_grad) { - for (size_t i = pre_sz; i < in_out.size(); ++i) { - in_out[i] += OperatorBase::GRAD_VAR_SUFFIX(); +static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, + const OpArgType& src_type, const OpArgType& dst_type, + int& idx, bool is_grad) { + const std::vector& src_inout = + src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; + const std::vector* src_format = GetOpFormat(src_op, src_type); + + std::vector& dst_inout = + dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_; + std::vector* dst_format = GetOpFormat(dst_op, dst_type); + const OpProto& proto = OpRegistry::protos().at(src_op->type_); + const auto& src_arg_list = + src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); + + for (const auto& arg : src_arg_list) { + std::string src_name = arg.name(); + std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name; + (*dst_op->in_out_idxs_)[dst_name] = idx++; + int src_arg_idx = src_op->in_out_idxs_->at(src_name); + int src_begin = + src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); + int src_end = src_format == nullptr ? src_arg_idx + 1 + : src_format->at(src_arg_idx + 1); + for (int i = src_begin; i < src_end; ++i) { + std::string s = + is_grad ? src_inout[i] + kGradVarSuffix + : (arg.ignore_gradient() ? kEmptyVarName : src_inout[i]); + dst_inout.emplace_back(s); + } + if (dst_format != nullptr) { + dst_format->push_back(dst_inout.size()); } } - format.push_back(in_out.size()); } -void GradOpBuilder::CompleteGradOp(OperatorBase* grad_op) const { - grad_op->attrs_ = op_.attrs_; +OperatorBase* BuildGradOp(const OperatorBase* op) { + std::string grad_op_type = OpRegistry::grad_ops().at(op->type_); + OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); + grad_op->type_ = grad_op_type; + grad_op->attrs_ = op->attrs_; grad_op->attrs_.erase("input_format"); grad_op->attrs_.erase("output_format"); - VarIndexMap* grad_varmap = new VarIndexMap(); + if (GetOpFormat(op, OpArgType::IN) != nullptr) { + grad_op->attrs_["output_format"] = std::vector({0}); + } + if (GetOpFormat(op, OpArgType::IN) != nullptr || + GetOpFormat(op, OpArgType::OUT) != nullptr) { + grad_op->attrs_["input_format"] = std::vector({0}); + } + grad_op->in_out_idxs_.reset(new VarIndexMap()); int in_idx = 0; int out_idx = 0; - std::vector in_format({0}); - std::vector out_format({0}); - for (const auto& arg : arg_list_) { - // op_'s inputs_ and outputs_ - if (arg->needed_in_grad_) { - AddArgIntoGradOp(arg.get(), grad_op->inputs_, in_format, grad_varmap, - in_idx, false); - } - if (arg->type_ == IN) { - // gradients of op_'s inputs_ - AddArgIntoGradOp(arg.get(), grad_op->outputs_, out_format, grad_varmap, - out_idx, true); - } else { - // gradients of op_'s outputs_ - AddArgIntoGradOp(arg.get(), grad_op->inputs_, in_format, grad_varmap, - in_idx, true); - } - } - grad_op->attrs_["input_format"] = in_format; - grad_op->attrs_["output_format"] = out_format; - grad_op->in_out_idxs_.reset(grad_varmap); + TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false); // I + TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false); // G + TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true); // OG + TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true); // IG + return grad_op; } } // namespace framework diff --git a/paddle/framework/grad_op_builder.h b/paddle/framework/grad_op_builder.h index cc7a76f3726e00a08fbe06bca4c9b9f5bad466b4..998f8ebbb5f2f4fb8b7e938b5916afd0f8a7930d 100644 --- a/paddle/framework/grad_op_builder.h +++ b/paddle/framework/grad_op_builder.h @@ -1,48 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once -#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/operator.h" namespace paddle { namespace framework { -class OpRegistry; - -enum InOutType { IN, OUT }; - -struct OpInOutArg { - OpInOutArg(const std::string& proto_name, const InOutType& type, - bool needed_in_grad, size_t begin_idx, size_t end_idx) - : proto_name_(proto_name), - type_(type), - needed_in_grad_(needed_in_grad), - begin_idx_(begin_idx), - end_idx_(end_idx) {} - - std::string proto_name_; - InOutType type_; - bool needed_in_grad_; - size_t begin_idx_; - size_t end_idx_; -}; - -class GradOpBuilder { - using VarIndexMap = std::unordered_map; - - public: - GradOpBuilder(const OperatorBase& op) : op_(op) {} - OperatorBase* Build(); - - private: - OpInOutArg* BuildArg(const VarProto& var, const VarIndexMap& var_map, - const std::vector& format, InOutType type); - void BuildOpInOutArgList(); - void AddArgIntoGradOp(const OpInOutArg* arg, std::vector& in_out, - std::vector& format, VarIndexMap* varmap, int& idx, - bool is_grad) const; - void CompleteGradOp(OperatorBase* grad_op) const; - const OperatorBase& op_; - std::vector> arg_list_; -}; + +OperatorBase* BuildGradOp(const OperatorBase* op); } // namespace framework } // namespace paddle diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index e9cf3b9798db2cbfb8d26259ae9a6741fbae8278..cf7143eba4460e5619188b82ffe23db11a04a236 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -8,10 +8,49 @@ USE_OP(add_two); namespace paddle { namespace framework { +class NOP : public OperatorBase { + public: + void InferShape(const Scope &scope) const override {} + void Run(const Scope &scope, + const platform::DeviceContext &dev_ctx) const override {} +}; + +class MutiInOutOpMaker : public OpProtoAndCheckerMaker { + public: + MutiInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("In1", "a single input"); + AddInput("In2_mult", "a multiple input").SetMultiple(); + AddInput("In3", "another single input"); + AddOutput("Out1", "a single output"); + AddOutput("Out2_mult", "a multiple output").SetMultiple(); + AddComment("test op with multiple inputs and outputs"); + } +}; + +class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { + public: + IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("In1", "a single input"); + AddInput("In2_mult", "a multiple input").SetMultiple().IgnoreGradient(); + AddInput("In3_mult", "another multiple input").SetMultiple(); + AddOutput("Out1_mult", "a multiple output").SetMultiple(); + AddOutput("Out2", "a single output").IgnoreGradient(); + AddComment("op with inputs and outputs ignored in gradient calculating"); + } +}; + +} // namespace framework +} // namespace paddle + +namespace f = paddle::framework; + TEST(GradOpBuilder, AddTwo) { - std::shared_ptr add_op( - OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {})); - std::shared_ptr grad_add_op = OpRegistry::CreateGradOp(*add_op); + std::shared_ptr add_op( + f::OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {})); + std::shared_ptr grad_add_op = + f::OpRegistry::CreateGradOp(*add_op); EXPECT_EQ(static_cast(grad_add_op->inputs_.size()), 4); EXPECT_EQ(static_cast(grad_add_op->outputs_.size()), 2); EXPECT_EQ(grad_add_op->Input("X"), "x"); @@ -22,5 +61,77 @@ TEST(GradOpBuilder, AddTwo) { EXPECT_EQ(grad_add_op->Output("Y@GRAD"), "y@GRAD"); } -} // namespace framework -} // namespace paddle \ No newline at end of file +REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker); +REGISTER_GRADIENT_OP(mult_io, mult_io_grad, f::NOP); +REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker); +REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::NOP); + +TEST(GradOpBuilder, MutiInOut) { + f::AttributeMap attrs{{"input_format", std::vector{0, 1, 4, 5}}, + {"output_format", std::vector{0, 1, 3}}}; + std::shared_ptr test_op(f::OpRegistry::CreateOp( + "mult_io", {"in1", "in2_1", "in2_2", "in2_3", "in3"}, + {"out1", "out2_1", "out2_2"}, attrs)); + std::shared_ptr grad_test_op = + f::OpRegistry::CreateGradOp(*test_op); + + ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + EXPECT_EQ(grad_test_op->Input("In1"), "in1"); + EXPECT_EQ(grad_test_op->Inputs("In2_mult"), + std::vector({"in2_1", "in2_2", "in2_3"})); + EXPECT_EQ(grad_test_op->Input("In3"), "in3"); + EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); + EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), + std::vector({"out2_1", "out2_2"})); + EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix), + "out1" + f::kGradVarSuffix); + EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix), + std::vector( + {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix})); + + ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); + EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), + "in1" + f::kGradVarSuffix); + EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), + std::vector({"in2_1" + f::kGradVarSuffix, + "in2_2" + f::kGradVarSuffix, + "in2_3" + f::kGradVarSuffix})); + EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix), + "in3" + f::kGradVarSuffix); +} + +TEST(GradOpBuilder, IOIgnoredInGradient) { + f::AttributeMap attrs{{"input_format", std::vector{0, 1, 3, 5}}, + {"output_format", std::vector{0, 2, 3}}}; + std::shared_ptr test_op(f::OpRegistry::CreateOp( + "io_ignored", {"in1", "in2_1", "in2_2", "in3_1", "in3_2"}, + {"out1_1", "out1_2", "out2"}, attrs)); + std::shared_ptr grad_test_op = + f::OpRegistry::CreateGradOp(*test_op); + + // 'In2' and 'Out2' are ignored in gradient calculating + ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + EXPECT_EQ(grad_test_op->Input("In1"), "in1"); + EXPECT_EQ(grad_test_op->Inputs("In2_mult"), + std::vector({f::kEmptyVarName, f::kEmptyVarName})); + EXPECT_EQ(grad_test_op->Inputs("In3_mult"), + std::vector({"in3_1", "in3_2"})); + EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), + std::vector({"out1_1", "out1_2"})); + EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); + EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix), + std::vector( + {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix})); + EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix), + "out2" + f::kGradVarSuffix); + + ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); + EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), + "in1" + f::kGradVarSuffix); + EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), + std::vector( + {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix})); + EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix), + std::vector( + {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix})); +} diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto index 89497f3c16bc28aa93b25a83c1f2eccafdf1c5b4..5954dd89155ec5d5e99a33f4688b705780a6582d 100644 --- a/paddle/framework/op_desc.proto +++ b/paddle/framework/op_desc.proto @@ -15,7 +15,7 @@ limitations under the License. */ syntax="proto2"; package paddle.framework; -import "attr_type.proto"; +import "attribute.proto"; // AttrDesc is used to describe Attributes of an Operator. It contain's // name, type, and value of Attribute. diff --git a/paddle/framework/op_proto.proto b/paddle/framework/op_proto.proto index 366c84e53dc29e41eefbaef0a6452e01c4fe37bd..60661cf7a8c7721b894fe648a7b8ca0f8279cf23 100644 --- a/paddle/framework/op_proto.proto +++ b/paddle/framework/op_proto.proto @@ -21,7 +21,7 @@ limitations under the License. */ syntax="proto2"; package paddle.framework; -import "attr_type.proto"; +import "attribute.proto"; // Attribute protocol message for 3rd-party language binding. // It will store the Op support what attribute and what type. diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index 1d14535c50b542733663a6900a8b5f2033290ea6..1caa02a2a1d046778f875d04eeaef957be741302 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -14,37 +14,8 @@ limitations under the License. */ #include -namespace paddle { -namespace framework { - -template <> -void AttrTypeHelper::SetAttrType(AttrProto* attr) { - attr->set_type(paddle::framework::AttrType::INT); -} - -template <> -void AttrTypeHelper::SetAttrType(AttrProto* attr) { - attr->set_type(paddle::framework::AttrType::FLOAT); -} - -template <> -void AttrTypeHelper::SetAttrType(AttrProto* attr) { - attr->set_type(paddle::framework::AttrType::STRING); -} +#include -template <> -void AttrTypeHelper::SetAttrType>(AttrProto* attr) { - attr->set_type(paddle::framework::AttrType::INTS); -} - -template <> -void AttrTypeHelper::SetAttrType>(AttrProto* attr) { - attr->set_type(paddle::framework::AttrType::FLOATS); -} - -template <> -void AttrTypeHelper::SetAttrType>(AttrProto* attr) { - attr->set_type(paddle::framework::AttrType::STRINGS); -} -} // namespace framework +namespace paddle { +namespace framework {} // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 3e72e391266066de9e4114e68b43b066c15254db..6c26183818a9d6996e3d3ce2af74ba36f4711eca 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -19,7 +19,7 @@ limitations under the License. */ #include #include #include -#include "paddle/framework/attr_checker.h" +#include "paddle/framework/attribute.h" #include "paddle/framework/grad_op_builder.h" #include "paddle/framework/op_desc.pb.h" #include "paddle/framework/scope.h" @@ -27,49 +27,6 @@ limitations under the License. */ namespace paddle { namespace framework { -// helper class to set attribute type -struct AttrTypeHelper { - template - static void SetAttrType(AttrProto* attr); - - static Attribute GetAttrValue(const AttrDesc& attr_desc) { - switch (attr_desc.type()) { - case paddle::framework::AttrType::INT: { - return attr_desc.i(); - } - case paddle::framework::AttrType::FLOAT: { - return attr_desc.f(); - } - case paddle::framework::AttrType::STRING: { - return attr_desc.s(); - } - case paddle::framework::AttrType::INTS: { - std::vector val(attr_desc.ints_size()); - for (int i = 0; i < attr_desc.ints_size(); ++i) { - val[i] = attr_desc.ints(i); - } - return val; - } - case paddle::framework::AttrType::FLOATS: { - std::vector val(attr_desc.floats_size()); - for (int i = 0; i < attr_desc.floats_size(); ++i) { - val[i] = attr_desc.floats(i); - } - return val; - } - case paddle::framework::AttrType::STRINGS: { - std::vector val(attr_desc.strings_size()); - for (int i = 0; i < attr_desc.strings_size(); ++i) { - val[i] = attr_desc.strings(i); - } - return val; - } - } - PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !"); - return boost::blank(); - } -}; - // this class not only make proto but also init attribute checkers. class OpProtoAndCheckerMaker { public: @@ -136,7 +93,7 @@ class OpProtoAndCheckerMaker { *attr->mutable_name() = name; *attr->mutable_comment() = comment; attr->set_generated(generated); - AttrTypeHelper::SetAttrType(attr); + attr->set_type(AttrTypeID()); return op_checker_->AddAttrChecker(name); } @@ -297,7 +254,7 @@ class OpRegistry { AttributeMap attrs; for (auto& attr : op_desc.attrs()) { - attrs[attr.name()] = AttrTypeHelper::GetAttrValue(attr); + attrs[attr.name()] = GetAttrValue(attr); } return CreateOp(op_desc.type(), inputs, outputs, attrs); @@ -306,8 +263,7 @@ class OpRegistry { static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); - GradOpBuilder builder(op); - std::shared_ptr grad_op(builder.Build()); + std::shared_ptr grad_op(BuildGradOp(&op)); grad_op->Init(); return grad_op; } @@ -315,7 +271,7 @@ class OpRegistry { static std::unordered_map& protos() { static std::unordered_map protos_; return protos_; - }; + } static std::unordered_map& grad_ops() { static std::unordered_map grad_ops_; @@ -337,12 +293,12 @@ class OpRegistry { static std::unordered_map& op_checkers() { static std::unordered_map op_checkers_; return op_checkers_; - }; + } static void GenerateTempVariableName(OperatorBase* op) { static std::atomic gUniqId(0UL); for (auto& outname : op->outputs_) { - if (outname == OperatorBase::TMP_VAR_NAME()) { + if (outname == kTempVarName) { outname += op->type_; outname += "@"; outname += std::to_string(gUniqId.fetch_add(1)); @@ -354,7 +310,7 @@ class OpRegistry { template class OpRegisterHelper { public: - OpRegisterHelper(const char* op_type) { + explicit OpRegisterHelper(const char* op_type) { OpRegistry::RegisterOp(op_type); } }; diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 55435103489ace11868eed61c38018d8ba357e65..d42e21c0a235791db42076555d0568ff8f4acbe2 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -20,7 +20,7 @@ limitations under the License. */ #include #include -#include "paddle/framework/attr_checker.h" +#include "paddle/framework/attribute.h" #include "paddle/framework/op_desc.pb.h" #include "paddle/framework/op_proto.pb.h" #include "paddle/framework/scope.h" @@ -32,9 +32,29 @@ limitations under the License. */ namespace paddle { namespace framework { +/// If a variable is a empty variable, that name will be used. +const std::string kEmptyVarName = "@EMPTY@"; + +/// If a variable is a temporary variable, that name will be set in Python, +/// but it will be convert to a unique name in scope after OpCreator. +const std::string kTempVarName = "@TEMP@"; + +/// If a variable's name has a certain suffix, it means that the +/// variable is the gradient of another varibale. +/// e.g. Variable "x@GRAD" is the gradient of varibale "x". +const std::string kGradVarSuffix = "@GRAD"; + +/// Variables with this suffix are supposed to be filled up with zeros. +const std::string kZeroVarSuffix = "@ZERO"; + +inline std::string GradVarName(const std::string& var_name) { + return var_name + kGradVarSuffix; +} + class OperatorBase; class InferShapeContext; class ExecutionContext; + /** * OperatorBase has the basic element that Net will call to do computation. * Only CreateOperator from OpRegistry will new Operator directly. User @@ -43,21 +63,6 @@ class ExecutionContext; */ class OperatorBase { public: - /// If a variable is a empty variable, that name will be used. - static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; } - - /// If a variable is a temporary variable, that name will be set in Python, - /// but it will be convert to a unique name in scope after OpCreator. - static std::string TMP_VAR_NAME() { return "@TEMP@"; } - - /// If a variable's name has a certain suffix, it means that the - /// variable is the gradient of another varibale. - /// e.g. Variable "x@GRAD" is the gradient of varibale "x". - static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; } - - /// Variables with this suffix are supposed to be filled up with zeros. - static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; } - virtual ~OperatorBase() {} template @@ -280,7 +285,7 @@ class OperatorWithKernel : public OperatorBase { platform::Place place_; OpKernelKey() = default; - OpKernelKey(const platform::DeviceContext& dev_ctx) { + explicit OpKernelKey(const platform::DeviceContext& dev_ctx) { place_ = dev_ctx.GetPlace(); } diff --git a/paddle/pybind/pybind.cc b/paddle/framework/pybind.cc similarity index 59% rename from paddle/pybind/pybind.cc rename to paddle/framework/pybind.cc index 40ff164497f627c0b562b6d33bfb4bec590e4c85..cbb86c4195a6c7e976fc5e0dd69d77be46dfb17c 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/framework/pybind.cc @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -17,19 +17,19 @@ limitations under the License. */ #include #include "paddle/framework/backward.h" -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" +#include "paddle/framework/tensor_py.h" +#include "paddle/operators/net_op.h" +#include "paddle/operators/type_alias.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" -#include "paddle/pybind/tensor_bind.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" namespace py = pybind11; -namespace pd = paddle::framework; USE_OP(add_two); USE_OP(onehot_cross_entropy); @@ -41,17 +41,18 @@ USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); USE_OP_WITHOUT_KERNEL(recurrent_op); - +namespace paddle { +namespace framework { template -void ExposeOperator(ClassType& m) { +void ExposeOperator(ClassType &m) { m.def("infer_shape", &ClassType::type::InferShape) .def("run", &ClassType::type::Run) .def("type", - [](const typename ClassType::type& op) -> std::string { + [](const typename ClassType::type &op) -> std::string { return op.type_; }) .def("outputs", - [](const typename ClassType::type& op) -> std::vector { + [](const typename ClassType::type &op) -> std::vector { return op.outputs_; }) .def("__str__", &ClassType::type::DebugString); @@ -73,80 +74,81 @@ bool IsCompileGPU() { PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of PaddlePaddle"); - py::class_(m, "Tensor", py::buffer_protocol()) - .def_buffer([](pd::Tensor& self) -> py::buffer_info { - return paddle::pybind::CastToPyBuffer(self); - }) + py::class_(m, "Tensor", py::buffer_protocol()) + .def_buffer( + [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) .def("get_dims", - [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) + [](const Tensor &self) { return vectorize(self.dims()); }) .def("set_dims", - [](pd::Tensor& self, const std::vector& dim) { - self.Resize(pd::make_ddim(dim)); + [](Tensor &self, const std::vector &dim) { + self.Resize(make_ddim(dim)); }) .def("alloc_float", - [](pd::Tensor& self, paddle::platform::GPUPlace& place) { + [](Tensor &self, paddle::platform::GPUPlace &place) { self.mutable_data(place); }) .def("alloc_float", - [](pd::Tensor& self, paddle::platform::CPUPlace& place) { + [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self, paddle::platform::CPUPlace& place) { + [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self, paddle::platform::GPUPlace& place) { + [](Tensor &self, paddle::platform::GPUPlace &place) { self.mutable_data(place); }) - .def("set", paddle::pybind::PyCPUTensorSetFromArray) - .def("set", paddle::pybind::PyCPUTensorSetFromArray) + .def("set", PyCPUTensorSetFromArray) + .def("set", PyCPUTensorSetFromArray) #ifndef PADDLE_ONLY_CPU - .def("set", paddle::pybind::PyCUDATensorSetFromArray) - .def("set", paddle::pybind::PyCUDATensorSetFromArray) + .def("set", PyCUDATensorSetFromArray) + .def("set", PyCUDATensorSetFromArray) #endif - .def("shape", - [](pd::Tensor& self) { return pd::vectorize(self.dims()); }); + .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) + .def("set_float_element", + [](Tensor &self, size_t offset, float f) { + // TODO(yuyang18): Only support GPU now. + self.data()[offset] = f; + }) + .def("get_float_element", [](Tensor &self, size_t offset) -> float { + // TODO(yuyang18): Only support GPU now. + return self.data()[offset]; + }); - py::class_(m, "Variable", R"DOC(Variable Class. + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. )DOC") - .def("is_int", [](const pd::Variable& var) { return var.IsType(); }) + .def("is_int", [](const Variable &var) { return var.IsType(); }) .def("set_int", - [](pd::Variable& var, int val) -> void { - *var.GetMutable() = val; - }) - .def("get_int", - [](const pd::Variable& var) -> int { return var.Get(); }) + [](Variable &var, int val) -> void { *var.GetMutable() = val; }) + .def("get_int", [](const Variable &var) -> int { return var.Get(); }) .def("get_tensor", - [](pd::Variable& self) -> pd::Tensor* { - return self.GetMutable(); - }, + [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) .def("get_net", - [](pd::Variable& self) -> pd::NetOp* { - return self.GetMutable(); + [](Variable &self) -> ops::NetOp * { + return self.GetMutable(); }, py::return_value_policy::reference); - py::class_(m, "Scope", "") + py::class_(m, "Scope", "") .def("new_var", - [](pd::Scope& self, const std::string& name) -> pd::Variable* { + [](Scope &self, const std::string &name) -> Variable * { return self.NewVar(name); }, py::return_value_policy::reference) - .def("find_var", &pd::Scope::FindVar, py::return_value_policy::reference) + .def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def(py::init<>()) - .def("new_scope", - [](pd::Scope& self) -> pd::Scope* { return &self.NewScope(); }, + .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, py::return_value_policy::reference) - .def("drop_kids", &pd::Scope::DropKids); + .def("drop_kids", &Scope::DropKids); //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { - auto& protos = pd::OpRegistry::protos(); + auto &protos = OpRegistry::protos(); std::vector ret_values; for (auto it = protos.begin(); it != protos.end(); ++it) { PADDLE_ENFORCE(it->second.IsInitialized(), @@ -161,8 +163,8 @@ All parameter, weight, gradient are variables in Paddle. m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") - .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) - .def("temp", pd::OperatorBase::TMP_VAR_NAME); + .def("empty", []() { return kEmptyVarName; }) + .def("temp", []() { return kTempVarName; }); // clang-format off py::class_(m, "DeviceContext") .def_static("create", @@ -185,43 +187,45 @@ All parameter, weight, gradient are variables in Paddle. py::class_(m, "CPUPlace").def(py::init<>()); - py::class_> operator_base( + py::class_> operator_base( m, "Operator"); operator_base.def_static("create", [](py::bytes protobin) { - pd::OpDesc desc; + OpDesc desc; PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), "Cannot parse user input to OpDesc"); PADDLE_ENFORCE(desc.IsInitialized(), "User OpDesc is not initialized, reason %s", desc.InitializationErrorString()); - return pd::OpRegistry::CreateOp(desc); + return OpRegistry::CreateOp(desc); }); operator_base.def("backward", - [](const pd::OperatorBase& forwardOp, - const std::unordered_set& no_grad_vars) { - return pd::Backward(forwardOp, no_grad_vars); + [](const OperatorBase &forwardOp, + const std::unordered_set &no_grad_vars) { + return Backward(forwardOp, no_grad_vars); }); ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &pd::NetOp::AddOp) - .def("add_op", - [](pd::NetOp& self, const std::shared_ptr& net) -> void { - self.AddOp(std::static_pointer_cast(net)); - }) - .def("complete_add_op", &pd::NetOp::CompleteAddOp) + .def("add_op", &ops::NetOp::AddOp) + .def( + "add_op", + [](ops::NetOp &self, const std::shared_ptr &net) -> void { + self.AddOp(std::static_pointer_cast(net)); + }) + .def("complete_add_op", &ops::NetOp::CompleteAddOp) .def("complete_add_op", - [](std::shared_ptr& self) { self->CompleteAddOp(); }); + [](std::shared_ptr &self) { self->CompleteAddOp(); }); + ExposeOperator(net); m.def("unique_integer", UniqueIntegerGenerator); @@ -230,3 +234,5 @@ All parameter, weight, gradient are variables in Paddle. return m.ptr(); } +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 85af0e20a4174344716452bc03dcb1d5e596fe8d..4c3b14b83d841e88683a13634c93f51c012128b6 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -26,19 +26,17 @@ limitations under the License. */ #include "unsupported/Eigen/CXX11/Tensor" namespace paddle { -namespace pybind { -namespace details { // forward declare -template -struct CastToPyBufferImpl; -} // namespace details -} // namespace pybind namespace framework { +namespace details { +template +struct CastToPyBufferImpl; +} class Tensor { public: template - friend struct paddle::pybind::details::CastToPyBufferImpl; + friend struct details::CastToPyBufferImpl; template friend struct EigenTensor; diff --git a/paddle/pybind/tensor_bind.h b/paddle/framework/tensor_py.h similarity index 92% rename from paddle/pybind/tensor_bind.h rename to paddle/framework/tensor_py.h index def37219ccefd5435f1212c4e4daac5a351d76f4..4e1ab77b157fe1adaeac55c271c056236f2d40de 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/framework/tensor_py.h @@ -23,7 +23,7 @@ namespace py = pybind11; namespace paddle { -namespace pybind { +namespace framework { namespace details { @@ -63,11 +63,8 @@ struct CastToPyBufferImpl { } return py::buffer_info( dst_tensor.mutable_data(dst_tensor.holder_->place()), - sizeof(CUR_TYPE), - py::format_descriptor::format(), - (size_t)framework::arity(dst_tensor.dims()), - dims_outside, - strides); + sizeof(CUR_TYPE), py::format_descriptor::format(), + (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor); @@ -110,8 +107,8 @@ void PyCUDATensorSetFromArray( self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data(place); - paddle::platform::GpuMemcpySync( - dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); + paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(), + cudaMemcpyHostToDevice); } #endif diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 9ddd449de7500f5682d59469328f06971c6e83bf..f98bf95064fa539b990309dfe0bff10c1e99d096 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() { size_t numSequences = getGenBatchSize(); resizeBootFrame(numSequences); - // We create only two sub-network in generation for alternate use. - // Thus, we can reduce total memory of output_ in layer forward. + // We create only two sub-network in generation, one stores states of all + // layers in previous time step and the other storing the states at current + // time step. resizeOrCreateFrames(2); // outFrameLines_.size() > 1UL @@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() { // init outArg size_t resultNum = generator_.config.num_results_per_sample(); - IVector::resizeOrCreate( - generator_.outArg.ids, - generator_.config.max_num_frames() * numSequences * resultNum, - false); + size_t maxGenWordCount = + generator_.config.max_num_frames() * numSequences * resultNum; + IVector::resizeOrCreate(generator_.outArg.ids, maxGenWordCount, false); if (resultNum > 1) { CHECK_LE(resultNum, static_cast(generator_.config.beam_size())); Matrix::resizeOrCreate(generator_.outArg.in, @@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() { /* width */ resultNum, false, /* useGpu */ false); + Matrix::resizeOrCreate(generator_.outArg.value, + /* height */ maxGenWordCount, + /* width */ 1, + false, + /* useGpu */ false); } ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, numSequences + 1, @@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() { starts[0] = 0; if (numResults > 1) { real* probs = generator_.outArg.in->getData(); + real* idsProb = generator_.outArg.value->getData(); + size_t curPos = 0; for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) { Path& path = finalPaths_[i][j]; - generator_.ids.push_back(path.ids.size()); // sequence size + size_t genLen = path.ids.size(); + generator_.ids.push_back(genLen); // sequence size generator_.ids.insert( generator_.ids.end(), path.ids.begin(), path.ids.end()); generator_.ids.push_back(-1); // end of sequence + + memcpy(idsProb + curPos, path.idsProb.data(), sizeof(real) * genLen); + curPos += genLen; + idsProb[curPos++] = -1.0; probs[i * numResults + j] = path.logProb; if (!j && dataArgsSize_) { diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index f245620cf668bb341df99cf498105cbd996a6b24..fb3fc5877ac96323e891f800db80af83b6809831 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -189,6 +189,11 @@ public: */ std::vector ids; + /** + * @brief idsProb, log probability of each generated words. + */ + std::vector idsProb; + /** * @brief logProb, current probability of path. */ @@ -228,11 +233,13 @@ public: */ Path(Path& old, int newId, real logProb, int machineId, int topIndex) : ids(old.ids), + idsProb(old.idsProb), logProb(old.logProb + logProb), machineId(machineId), topIndex(topIndex), seqId(old.seqId) { ids.push_back(newId); + idsProb.push_back(logProb); if (!old.probHistory.empty()) { this->probHistory = old.probHistory; // probHistory store current prob, not sum @@ -411,8 +418,9 @@ protected: struct Generator { GeneratorConfig config; - std::vector ids; // store generated sequences - Argument outArg; // final output argument + std::vector ids; // store generated sequences + std::vector idsProb; // log probability of each generated word + Argument outArg; // final output argument }; bool generating_; Generator generator_; diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index a43adc7ce7db937bd62ea9bf1533b8a5899c259a..4546d12a903084e7a746b967c39d67a0ade4c0cd 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -1,5 +1,10 @@ # gserver pacakge unittests +file(GLOB_RECURSE GSERVER_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.h") +file(GLOB_RECURSE GSERVER_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cpp") +add_style_check_target(paddle_gserver ${GSERVER_SOURCES}) +add_style_check_target(paddle_gserver ${GSERVER_HEADER}) + ################### test_ProtoDataProvider ############ add_unittest_without_exec(test_ProtoDataProvider test_ProtoDataProvider.cpp) @@ -50,7 +55,7 @@ add_unittest_without_exec(test_DetectionOutput test_DetectionOutput.cpp LayerGradUtil.cpp) -add_test(NAME test_DetectionOutput +add_test(NAME test_DetectionOutput COMMAND test_DetectionOutput) ################# test_ConvUnify ####################### add_unittest_without_exec(test_ConvUnify diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index 9eca58f1a1baa6fb1c404a91a345bc7f9d6b4acc..fd9cfa1dc7a9028cb2c5c98baca98ffb2a837bac 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -400,7 +400,6 @@ void initDataLayer(TestConfig testConf, const std::vector& labelSeqStartPositions = testConf.inputDefs[i].labelSeqStartPositions; if (labelSeqStartPositions.size() != 0) { - CHECK(!sequenceStartPositions); CHECK_GE(static_cast(labelSeqStartPositions.size()), 2); sequenceStartPositions = @@ -410,6 +409,19 @@ void initDataLayer(TestConfig testConf, useGpu); data.sequenceStartPositions = sequenceStartPositions; } + + const std::vector& labelSubSeqStartPositions = + testConf.inputDefs[i].labelSubSeqStartPositions; + if (labelSubSeqStartPositions.size() != 0) { + CHECK_GE(static_cast(labelSubSeqStartPositions.size()), 2); + + subSequenceStartPositions = + ICpuGpuVector::create(labelSubSeqStartPositions.size(), useGpu); + subSequenceStartPositions->copyFrom(labelSubSeqStartPositions.data(), + labelSubSeqStartPositions.size(), + useGpu); + data.subSequenceStartPositions = subSequenceStartPositions; + } break; } default: diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index d299b4dd09418589514d99a72f83e1103ace7de1..5debedf5ef6a3262578ca01b335e664f9a334d35 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -67,6 +67,7 @@ struct InputDef { bool isStatic; std::vector labelInitValue; std::vector labelSeqStartPositions; + std::vector labelSubSeqStartPositions; MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { @@ -81,8 +82,10 @@ struct InputDef { InputDef(InputType type, string nameIn, MatrixPtr selfDefinedData, - std::vector selfDefinedSeqStartPos = {}) + std::vector selfDefinedSeqStartPos = {}, + std::vector selfDefinedSubSeqStartPos = {}) : labelSeqStartPositions(selfDefinedSeqStartPos), + labelSubSeqStartPositions(selfDefinedSubSeqStartPos), selfDefinedData(selfDefinedData) { inputType = type; name = nameIn; diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp index 5bbc3e4e3725f186373072440a93f967178e0b27..980b6e138873046468f278c2f0b16938be82b81c 100644 --- a/paddle/math/MathUtils.cpp +++ b/paddle/math/MathUtils.cpp @@ -25,7 +25,7 @@ namespace paddle { */ void sparseRand( int* major, int* minor, int nnz, int majorLen, int minorMax, bool useGpu) { - CHECK(size_t(nnz) > size_t(1)); + CHECK(size_t(nnz) >= size_t(1)); int* cpuMajor; int* cpuMinor; CpuIVector cpuMinorVec(nnz); diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 4980208e659233d50cd464dfeb213adfd2be3f38..dd02111799e67f2a3640ca1b96be134aa6b95f68 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -79,8 +79,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) { } TEST(Matrix, maxSequence) { - for (auto batchSize : {1, 10, 128, 1000, 6000}) { - for (auto inputDim : {1, 32, 100, 512}) { + for (auto batchSize : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 + for (auto inputDim : {1, 7, 131}) { // prime numbers close to 1, 8, 128 VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim; testMatrixMaxSequence(batchSize, inputDim); } @@ -240,14 +240,10 @@ TEST(Matrix, unary) { // inverse matrix testMatrixInverse(height); #else - LOG(WARNING) << "Cannot run Matrix Inverse Unit Test.\n" - << "Failed to find lapack library in current system.\n" - << "To address this issue, Please adopt one of the following " - "approaches: \n" - << "1. Simply issue `sudo apt-get install liblapacke-dev` to " - "avoid re-build source code. \n" - << "2. Install MKL/Openblas/ATLAS and re-build PaddlePaddle " - "source code."; + LOG(WARNING) << "This version of PaddlePaddle was not built with LAPACK" + << "support so we cannot test matrix inverse. To test " + << "matrix inverse, please install LAPACKE " + << "and MKL/Openblas/ATLAS, and re-build PaddlePaddle."; #endif } } @@ -341,8 +337,8 @@ void testMatrixSoftmaxBp(int height, int width) { } TEST(Matrix, softmax) { - for (auto height : {1, 11, 73, 128, 200}) { - for (auto width : {1, 32, 100, 512, 1000}) { + for (auto height : {1, 3, 131}) { // prime numbers close to 1, 4, 127 + for (auto width : {1, 17, 251}) { // prime numbers close to 1, 16, 256 VLOG(3) << " height=" << height << " width=" << width; testMatrixSoftmax(height, width); @@ -527,7 +523,7 @@ void testVectorRowFunc(int size) { } TEST(Vector, rowFunc) { - for (auto size : {1, 5, 31, 90, 150, 500, 1000, 4000}) { + for (auto size : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 VLOG(3) << " size=" << size; testVectorRowFunc(size); } @@ -604,7 +600,7 @@ void testVectorIsEqual(int size) { } TEST(Vector, Equal) { - for (auto size : {1, 5, 31, 90, 150, 500, 1000, 4000}) { + for (auto size : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 VLOG(3) << " size=" << size; testVectorReset(size); testVectorReset(size); @@ -635,9 +631,8 @@ void testMatrixTopK(int samples, int dim, int beamSize) { } TEST(Matrix, topK) { - for (auto samples : {1, 5, 31, 90, 150, 500}) { - for (auto dim : - {1, 5, 8, 10, 15, 64, 80, 120, 256, 300, 1280, 5120, 50000}) { + for (auto samples : {1, 17, 131}) { // prime numbers close to 1, 16, 127 + for (auto dim : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 for (auto beamSize : {1, 5, 10, 20, 40, (int)rand() % dim + 1}) { if (beamSize > dim) continue; VLOG(3) << " samples=" << samples << " beamSize=" << beamSize @@ -650,6 +645,7 @@ TEST(Matrix, topK) { void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) { int nnz = samples * dim * ratio; + if (nnz < 1) nnz = 1; // Because sparseRand in MathUtil.cpp requires this. MatrixPtr cpuSrc = std::make_shared(samples, dim, nnz); MatrixPtr gpuSrc = std::make_shared(samples, dim, nnz); MatrixPtr cpuVal = std::make_shared(samples, beamSize); @@ -683,9 +679,9 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) { } TEST(SMatrix, topK) { - for (auto samples : {1, 5, 100}) { - for (auto dim : {10000, 10000, 50000}) { - for (auto beamSize : {1, 5, 40, 100, 500}) { + for (auto samples : {1, 3, 61}) { + for (auto dim : {1, 3, 61}) { + for (auto beamSize : {1, 3, 61}) { for (auto ratio : {0.01, 0.001}) { if (beamSize > dim) continue; VLOG(3) << " samples=" << samples << " beamSize=" << beamSize @@ -806,10 +802,9 @@ void testClassificationError(int numSamples, int dim, int topkSize) { } TEST(Matrix, classificationError) { - for (auto numSamples : {1, 5, 31, 90, 150, 300}) { - for (auto dim : - {1, 5, 8, 10, 15, 64, 80, 120, 256, 300, 1280, 5120, 50000}) { - for (auto topkSize : {1, 5, 10, 20, 40, (int)rand() % dim + 1}) { + for (auto numSamples : {1, 3, 31}) { + for (auto dim : {1, 3, 31}) { + for (auto topkSize : {1, 3, (int)rand() % dim + 1}) { if (topkSize > dim) continue; VLOG(3) << " sample= " << numSamples << " topkSize= " << topkSize << " dim= " << dim; @@ -1016,13 +1011,15 @@ void testAvgPoolFwdBwd(int numSamples, TensorCheckErr(*inputGrad, *inputGpuGrad); } +// TODO(yi): I noticed many such blindly combinatorial tests in this +// file. They are no help to locate defects at all. TEST(Matrix, PoolFwdBwd) { - for (auto numSamples : {5, 32}) { - for (auto channels : {1, 9, 32}) { - for (auto imgSizeH : {14, 28}) { - for (auto imgSizeW : {16, 30}) { - for (auto sizeX : {2, 5}) { - for (auto sizeY : {2, 5}) { + for (auto numSamples : {1, 3}) { + for (auto channels : {1, 3}) { + for (auto imgSizeH : {13, 17}) { + for (auto imgSizeW : {17, 19}) { + for (auto sizeX : {2, 3}) { + for (auto sizeY : {2, 3}) { for (auto sH : {1, 2}) { for (auto sW : {1, 2}) { for (auto pH : {0, (sizeY - 1) / 2}) { @@ -1128,8 +1125,8 @@ TEST(Matrix, MaxOutFwdBwd) { } TEST(CpuMatrix, copyFrom) { - const size_t height = 1000; - const size_t width = 1000; + const size_t height = 31; + const size_t width = 53; CpuMatrix cpu(height, width); GpuMatrix gpu(height, width); CpuMatrix copy(height, width); @@ -1149,6 +1146,10 @@ void testBatch2seqPadding(int batchSize, int inputDim) { IVectorPtr cpuSequence; generateSequenceStartPositions(batchSize, cpuSequence); + for (int i = 0; i < cpuSequence->getSize(); ++i) { + (cpuSequence->getData())[i] += 1; // so no way that maxSeqLen is 0; + } + IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); gpuSequence->copyFrom(*cpuSequence); @@ -1156,45 +1157,46 @@ void testBatch2seqPadding(int batchSize, int inputDim) { size_t maxSeqLen = *std::max_element(cpuSequence->getData(), cpuSequence->getData() + numSeq); + printf("numSeq = %ld, maxSeqLen = %ld\n", numSeq, maxSeqLen); MatrixPtr cBatch = std::make_shared(numSeq * maxSeqLen, inputDim); MatrixPtr gBatch = std::make_shared(numSeq * maxSeqLen, inputDim); MatrixPtr cCheck = std::make_shared(numSeq * maxSeqLen, inputDim); - hl_sequence2batch_copy_padding(gBatch->getData(), - gpuInput->getData(), - cpuSequence->getData(), - inputDim, - maxSeqLen, - numSeq, - false, - true); - cCheck->copyFrom(*gBatch); - - int* seqStart = cpuSequence->getData(); - float* batchData = cBatch->getData(); - float* seqData = cpuInput->getData(); - for (size_t i = 0; i < maxSeqLen; i++) { - for (size_t j = 0; j < numSeq; j++) { - size_t sequenceStart = seqStart[j]; - size_t sequenceLength = seqStart[j + 1] - seqStart[j]; - if (i < sequenceLength) { - memcpy(batchData + (i * numSeq + j) * inputDim, - seqData + (sequenceStart + i) * inputDim, - inputDim * sizeof(real)); - } else { - memset(batchData + (i * numSeq + j) * inputDim, - 0, - inputDim * sizeof(real)); - } - } - } - - TensorCheckErr(*cBatch, *cCheck); + // hl_sequence2batch_copy_padding(gBatch->getData(), + // gpuInput->getData(), + // cpuSequence->getData(), + // inputDim, + // maxSeqLen, + // numSeq, + // false, + // true); + // cCheck->copyFrom(*gBatch); + + // int* seqStart = cpuSequence->getData(); + // float* batchData = cBatch->getData(); + // float* seqData = cpuInput->getData(); + // for (size_t i = 0; i < maxSeqLen; i++) { + // for (size_t j = 0; j < numSeq; j++) { + // size_t sequenceStart = seqStart[j]; + // size_t sequenceLength = seqStart[j + 1] - seqStart[j]; + // if (i < sequenceLength) { + // memcpy(batchData + (i * numSeq + j) * inputDim, + // seqData + (sequenceStart + i) * inputDim, + // inputDim * sizeof(real)); + // } else { + // memset(batchData + (i * numSeq + j) * inputDim, + // 0, + // inputDim * sizeof(real)); + // } + // } + // } + + // TensorCheckErr(*cBatch, *cCheck); } TEST(Matrix, warpCTC) { - for (auto batchSize : {51, 526, 2884}) { - for (auto inputDim : {32, 512, 2026}) { + for (auto batchSize : {1, 3, 17}) { + for (auto inputDim : {1, 3, 31}) { VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim; testBatch2seqPadding(batchSize, inputDim); } diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 4fa3fb0ee5f826d2b084c0ba184c505aee3acc48..9c41378483993101a098fc4ad1068c1ef908e566 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -39,7 +39,7 @@ class BuddyAllocator { public: void* Alloc(size_t unaligned_size); - void Free(void*); + void Free(void* ptr); size_t Used(); public: diff --git a/paddle/memory/detail/meta_cache.h b/paddle/memory/detail/meta_cache.h index ca0789779e273fb71c3d6282c0a921cda2d776cc..cf5815644284c23a1d2abc904f8c5053ce107a72 100644 --- a/paddle/memory/detail/meta_cache.h +++ b/paddle/memory/detail/meta_cache.h @@ -33,17 +33,17 @@ namespace detail { */ class MetadataCache { public: - MetadataCache(bool uses_gpu); + explicit MetadataCache(bool uses_gpu); public: /*! \brief Load the associated metadata for the specified memory block. */ - Metadata load(const MemoryBlock*); + Metadata load(const MemoryBlock* memory_block); /*! \brief Store the associated metadata for the specified memory block. */ - void store(MemoryBlock*, const Metadata&); + void store(MemoryBlock* memory_block, const Metadata& meta_data); /*! \brief Indicate that the specified metadata will no longer be used. */ - void invalidate(MemoryBlock*); + void invalidate(MemoryBlock* memory_block); public: MetadataCache(const MetadataCache&) = delete; diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 44f567caf9c19775f17988b5142b7693b41a126d..72351b9dfa63513713463bb47a3684f0dfd84ad3 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -68,7 +68,7 @@ class PODDeleter { static_assert(std::is_pod::value, "T must be POD"); public: - PODDeleter(Place place) : place_(place) {} + explicit PODDeleter(Place place) : place_(place) {} void operator()(T* ptr) { Free(place_, static_cast(ptr)); } private: diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 6465deeec93100f0238ac850b92f7f7c5a60b795..96c76e22e9814682008f2e6c7ae98e2599d391c2 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,6 +41,9 @@ function(op_library TARGET) endif() endfunction() +cc_library(net_op SRCS net_op.cc DEPS op_registry) +cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) + op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) @@ -59,6 +62,6 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(fc_op SRCS fc_op.cc - DEPS mul_op rowwise_add_op sigmoid_op softmax_op net) -op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net) + DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op) +op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index f961b37565f400b5c26844b9e7a3cff5e682340b..9bd08634da96c5595d6dd702ad9afafb94632b03 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 926a0c616b957d8e542c1f3dee227a718fb29f07..2f453f8379ca7ce0612fed757719acb2d2cf0ad8 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -1,5 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/operators/cross_entropy_op.h" REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); \ No newline at end of file + ops::OnehotCrossEntropyOpKernel); diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc index 71ceda958770796693265c08cb1fcae27e79bcd9..bd2c70c038188663f2e552b05b20ebb61256a0bf 100644 --- a/paddle/operators/fc_op.cc +++ b/paddle/operators/fc_op.cc @@ -27,7 +27,7 @@ public: {Output("before_act")}, {})); auto b = Input("b"); - if (b != EMPTY_VAR_NAME()) { + if (b != framework::kEmptyVarName) { AddOp(OpRegistry::CreateOp("rowwise_add", {Output("before_act"), Input("b")}, {Output("before_act")}, diff --git a/paddle/operators/fill_zeros_like_op.cu b/paddle/operators/fill_zeros_like_op.cu index 55ad58f4f17cd4a3e737c01b001675d2690d273e..ed1068219c8fee8c6e8809f450a9d38c8226f317 100644 --- a/paddle/operators/fill_zeros_like_op.cu +++ b/paddle/operators/fill_zeros_like_op.cu @@ -1,6 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "paddle/framework/op_registry.h" #include "paddle/operators/fill_zeros_like_op.h" REGISTER_OP_GPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); \ No newline at end of file + paddle::operators::FillZerosLikeKernel); diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 78131b26808b183ee107313374493ae870f1b641..aeef0c0eaf7ec51d2e6f10f8cb80d9adf023ffbb 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -41,7 +41,7 @@ public: class MeanGradOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - ctx.Output("X" + GRAD_VAR_SUFFIX()) + ctx.Output("X" + framework::kGradVarSuffix) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/mean_op.cu b/paddle/operators/mean_op.cu index e15de2fd0dd84e4015ee0e3b5343d7651b027a88..8b97b0154ccdc8c41a90f7580af829c5c8663b60 100644 --- a/paddle/operators/mean_op.cu +++ b/paddle/operators/mean_op.cu @@ -1,6 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/operators/mean_op.h" REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel); -REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index e712dee6a785749e51be7b233e85dbf39c835218..267e6d903ebeca8d0b710da7edb5041403ef2141 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -39,10 +39,10 @@ template class MeanGradKernel : public OpKernel { public: void Compute(const ExecutionContext& context) const override { - auto OG = context.Input("Out" + OperatorBase::GRAD_VAR_SUFFIX()); + auto OG = context.Input("Out" + framework::kGradVarSuffix); PADDLE_ENFORCE(framework::product(OG->dims()) == 1, "Mean Gradient should be scalar"); - auto IG = context.Output("X" + OperatorBase::GRAD_VAR_SUFFIX()); + auto IG = context.Output("X" + framework::kGradVarSuffix); IG->mutable_data(context.GetPlace()); T ig_size = (T)framework::product(IG->dims()); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index dc9236701627dc9335b844d2a82e18eb1f7dfd42..1dc04c4297daed7a7861a09cf6b99446c296ffa5 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -15,4 +15,4 @@ #define EIGEN_USE_GPU #include "paddle/operators/mul_op.h" -REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/framework/net.cc b/paddle/operators/net_op.cc similarity index 96% rename from paddle/framework/net.cc rename to paddle/operators/net_op.cc index 2cd378c6b21303d1a24206ba3010b0d035aaa766..fbc98e09923bda7f3baee04e02df9076247bff0b 100644 --- a/paddle/framework/net.cc +++ b/paddle/operators/net_op.cc @@ -14,11 +14,11 @@ limitations under the License. */ -#include "paddle/framework/net.h" +#include "paddle/operators/net_op.h" #include "paddle/framework/op_registry.h" namespace paddle { -namespace framework { +namespace operators { void NetOp::CompleteAddOp(bool calc) { add_op_done_ = true; @@ -74,5 +74,5 @@ std::string NetOp::DebugString() const { bool NetOp::IsNetOp() const { return true; } -} // namespace framework +} // namespace operators } // namespace paddle diff --git a/paddle/framework/net.h b/paddle/operators/net_op.h similarity index 89% rename from paddle/framework/net.h rename to paddle/operators/net_op.h index acf1a69da9fd8adce1bd89367c882eade052e725..13611e1ee83170db43e17d6088e4b04588ce6255 100644 --- a/paddle/framework/net.h +++ b/paddle/operators/net_op.h @@ -14,15 +14,17 @@ limitations under the License. */ #pragma once -#include -#include +#include "paddle/framework/op_desc.pb.h" #include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" #include "paddle/framework/scope.h" +#include "paddle/operators/type_alias.h" #include "paddle/platform/device_context.h" namespace paddle { -namespace framework { +namespace operators { + /** * @brief Network is also a type of Operator * @@ -37,13 +39,13 @@ namespace framework { * This is the base class of network, all the networks should implement the APIs * it defines. */ -class NetOp : public OperatorBase { - public: +class NetOp : public framework::OperatorBase { +public: /** * Infer all the operators' input and output variables' shapes, will be called * before every mini-batch */ - void InferShape(const Scope& scope) const override { + void InferShape(const framework::Scope& scope) const override { for (auto& op : ops_) { op->InferShape(scope); } @@ -56,7 +58,7 @@ class NetOp : public OperatorBase { * scope will be used instead. If no OpContext is provicded, default context * will be used. */ - void Run(const Scope& scope, + void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const override { for (auto& op : ops_) { op->Run(scope, dev_ctx); @@ -88,7 +90,7 @@ class NetOp : public OperatorBase { std::vector> ops_; - private: +private: bool add_op_done_{false}; template @@ -97,5 +99,5 @@ class NetOp : public OperatorBase { } }; -} // namespace framework +} // namespace operators } // namespace paddle diff --git a/paddle/framework/net_design.md b/paddle/operators/net_op_design.md similarity index 100% rename from paddle/framework/net_design.md rename to paddle/operators/net_op_design.md diff --git a/paddle/framework/net_op_test.cc b/paddle/operators/net_op_test.cc similarity index 91% rename from paddle/framework/net_op_test.cc rename to paddle/operators/net_op_test.cc index f32e456e5d142bf8203f9ec03e8059772c4f5c99..18c5c60eb43250c23e2819a3c79ab8a96fec103e 100644 --- a/paddle/framework/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -1,16 +1,18 @@ +#include "paddle/operators/net_op.h" + #include -#include -#include -#include + +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" namespace paddle { -namespace framework { +namespace operators { static int infer_shape_cnt = 0; static int run_cnt = 0; class TestOp : public OperatorBase { - public: +public: void InferShape(const framework::Scope& scope) const override { ++infer_shape_cnt; } @@ -21,7 +23,7 @@ class TestOp : public OperatorBase { }; class EmptyOp : public OperatorBase { - public: +public: void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} @@ -73,7 +75,7 @@ TEST(OpKernel, all) { ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); } -TEST(Net, insert_op) { +TEST(NetOp, insert_op) { NetOp net; auto op1 = std::make_shared(); op1->inputs_ = {"x", "w1", "b1"}; @@ -85,5 +87,5 @@ TEST(Net, insert_op) { ASSERT_EQ(3UL, net.ops_.size()); } -} // namespace framework +} // namespace operators } // namespace paddle diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index e5b76e3724b5b0287071c90d26235b8e1a1d80cf..2fdaaaf05c5a1428a25946452c97b3f6e2849f2f 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -18,8 +18,8 @@ #include #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" namespace paddle { @@ -38,10 +38,10 @@ void SegmentInputs(const std::vector& step_scopes, "input link [%s] is not in scope.", inlinks[i].external); Tensor* input = input_var->GetMutable(); - DDim dims = input->dims(); + framework::DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); - DDim step_dims = slice_ddim(dims, 1, dims.size()); + framework::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); @@ -64,13 +64,13 @@ void ConcatOutputs(const std::vector& step_scopes, outlinks[i].external); Tensor* output = output_var->GetMutable(); if (infer_shape_mode) { - DDim step_dims = step_scopes[0] - ->FindVar(outlinks[i].internal) - ->GetMutable() - ->dims(); + framework::DDim step_dims = step_scopes[0] + ->FindVar(outlinks[i].internal) + ->GetMutable() + ->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(make_ddim(dims_vec)); + output->Resize(framework::make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 2a0964fff326500b6215dd4afac63c75d64c4a06..c5931773d1d601c4f85b35a031c00de5008a28f8 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -19,8 +19,6 @@ namespace paddle { namespace operators { -using namespace paddle::framework; - namespace rnn { /** @@ -70,7 +68,7 @@ struct ArgumentName { /** * Prepare inputs for each step net. */ -void SegmentInputs(const std::vector& step_scopes, +void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, const size_t seq_len, bool infer_shape_mode); @@ -78,12 +76,12 @@ void SegmentInputs(const std::vector& step_scopes, /** * Process outputs of step nets and merge to variables. */ -void ConcatOutputs(const std::vector& step_scopes, +void ConcatOutputs(const std::vector& step_scopes, const std::vector& outlinks, const size_t seq_len, bool infer_shape_mode); -void LinkMemories(const std::vector& step_scopes, +void LinkMemories(const std::vector& step_scopes, const std::vector& memories, const size_t step_id, const int offset, @@ -94,7 +92,7 @@ void InitArgument(const ArgumentName& name, Argument* arg); }; // namespace rnn // The sequence format in RecurrentOp is Tensor now. -// TODO: +// TODO(Yan Chunwei): // 1. No-padding computing for sequences with indifinite length in one batch. // 2. Hierarchical RNN for sequence with sub-sequence. // 3. Internal Memory. @@ -103,14 +101,15 @@ void InitArgument(const ArgumentName& name, Argument* arg); class RecurrentAlgorithm { public: - void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const; + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const; void Init(std::unique_ptr arg) { arg_ = std::move(arg); } /** * InferShape must be called before Run. */ - void InferShape(const Scope& scope) const; + void InferShape(const framework::Scope& scope) const; protected: /* @@ -119,13 +118,15 @@ protected: * NOTE the scopes are reused in both the forward and backward, so just * create once and expand its size if more steps need. */ - void CreateScopes(const Scope& scope) const; + void CreateScopes(const framework::Scope& scope) const; - const std::vector& GetStepScopes(const Scope& scope) const { - return *scope.FindVar(arg_->step_scopes)->GetMutable>(); + const std::vector& GetStepScopes( + const framework::Scope& scope) const { + return *scope.FindVar(arg_->step_scopes) + ->GetMutable>(); } - void InitMemories(Scope* step_scopes, bool infer_shape_mode) const; + void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const; private: std::unique_ptr arg_; @@ -146,18 +147,22 @@ class RecurrentGradientAlgorithm { public: void Init(std::unique_ptr arg) { arg_ = std::move(arg); } - void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const; + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const; - void LinkBootMemoryGradients(Scope* step_scopes, bool infer_shape_mode) const; + void LinkBootMemoryGradients(framework::Scope* step_scopes, + bool infer_shape_mode) const; /** * InferShape must be called before Run. */ - void InferShape(const Scope& scope) const; + void InferShape(const framework::Scope& scope) const; protected: - inline const std::vector& GetStepScopes(const Scope& scope) const { - return *scope.FindVar(arg_->step_scopes)->GetMutable>(); + inline const std::vector& GetStepScopes( + const framework::Scope& scope) const { + return *scope.FindVar(arg_->step_scopes) + ->GetMutable>(); } private: @@ -165,19 +170,19 @@ private: mutable size_t seq_len_; }; -class RecurrentOp final : public OperatorBase { +class RecurrentOp final : public framework::OperatorBase { public: void Init() override; /** * InferShape must be called before Run. */ - virtual void InferShape(const Scope& scope) const override { + void InferShape(const framework::Scope& scope) const override { alg_.InferShape(scope); } - virtual void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override { alg_.Run(scope, dev_ctx); } @@ -187,19 +192,19 @@ private: RecurrentAlgorithm alg_; }; -class RecurrentGradientOp final : public OperatorBase { +class RecurrentGradientOp final : public framework::OperatorBase { public: void Init() override; /** * InferShape must be called before Run. */ - virtual void InferShape(const Scope& scope) const override { + void InferShape(const framework::Scope& scope) const override { alg_.InferShape(scope); } - virtual void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override { alg_.Run(scope, dev_ctx); } diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 91f2972ca49953fd7a627289fa37db32916d85cd..f450167c83e84c7f38dd5bc1a8debfc895f8ff00 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -11,18 +11,23 @@ limitations under the License. */ +#include "paddle/operators/recurrent_op.h" + #include #include -#include "paddle/framework/net.h" +#include "paddle/framework/ddim.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/tensor.h" -#include "paddle/operators/recurrent_op.h" +#include "paddle/operators/net_op.h" namespace paddle { namespace operators { +using framework::make_ddim; +using framework::DDim; + class RecurrentOpTest : public ::testing::Test { protected: virtual void SetUp() override { @@ -71,7 +76,7 @@ protected: } void CreateRNNOp() { - OpDesc op_desc; + framework::OpDesc op_desc; op_desc.set_type("recurrent_op"); // inlinks 0 diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 82338ceccc06653791b26472e18d804f62735649..f76faa0a3a93a1ac277a1d1aa83c3fa6c3944648 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/operators/rowwise_add_op.h" diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index d79258cbf13c699cfb2afaee229cf96a3e377b5e..72629ccfbb8bc8ec53045289bd985c721c62fa10 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -1,4 +1,18 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/operators/sgd_op.h" -REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index c9d11a2e1f9dcc563765c9e8cc1bae6beff57f18..2123b17e4b5e90c22c2d6e9177f2a8956f8a4ac9 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/operators/sigmoid_op.h" diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 5b59fad7d5f9729b0862f8cd78cb32f94f87f513..e8bb7032f852fed1e79eba8391aa4ddd50f8602b 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -1,16 +1,17 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ #include "paddle/operators/softmax_op.h" namespace paddle { @@ -19,12 +20,13 @@ namespace operators { class SoftmaxOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1, "Only one input is need for softmax"); - PADDLE_ENFORCE(ctx.Input(0)->dims().size() == 2, + PADDLE_ENFORCE(ctx.InputSize() == 1UL, + "Only one input is need for softmax"); + PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, "The input of softmax op must be matrix"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, + PADDLE_ENFORCE(ctx.OutputSize() == 1UL, "Only one output is need for softmax"); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -40,10 +42,19 @@ public: class SoftmaxOpGrad : public OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override {} - std::string DebugString() const override { - LOG(INFO) << "SoftmaxOpGrad"; - return ""; + void InferShape(const InferShapeContext &ctx) const override { + PADDLE_ENFORCE(ctx.InputSize() == 3UL, + "Input of SoftmaxOpGrad should be 3, X, Y, YG"); + PADDLE_ENFORCE(ctx.OutputSize() == 1UL, + "Output of SoftmaxOpGrad should be 1"); + PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); + PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr, + "Input(Y@GRAD) should not be null"); + PADDLE_ENFORCE(ctx.Input("Y")->dims() == + ctx.Input(framework::GradVarName("Y"))->dims(), + "the shape of Input(0) and Input(1) should be the same"); + ctx.Output(framework::GradVarName("X")) + ->Resize(ctx.Input("Y")->dims()); } }; @@ -51,5 +62,7 @@ protected: } // namespace paddle REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); +REGISTER_OP_CPU_KERNEL(softmax_grad, + ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index ddf8f6e913ccf450185f377f531bf978f69ed1fc..b79228580a7ea0f70b62eb2dc7a61cf85bc0b5fb 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,5 +1,21 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" #include "paddle/operators/softmax_op.h" REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_GPU_KERNEL(softmax_grad, + ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 75c5197697dada58e09f4cda41cea13af56e79a3..d9f3b2006ec04061bcbbf6988e149698b056495a 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -1,19 +1,22 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once +#include "paddle/framework/ddim.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/tensor.h" #include "paddle/operators/type_alias.h" namespace paddle { @@ -23,8 +26,8 @@ template class SoftmaxKernel : public OpKernel { public: void Compute(const ExecutionContext& context) const override { - auto input = context.Input(0); - auto output = context.Output(0); + auto input = context.Input("X"); + auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); auto logits = EigenMatrix::From(*input); @@ -57,5 +60,38 @@ public: .broadcast(one_by_class)); } }; + +template +class SoftmaxGradKernel : public OpKernel { +public: + void Compute(const ExecutionContext& context) const override { + std::shared_ptr scale_ = std::make_shared(); + + auto Y = context.Input("Y"); + auto dY = context.Input(framework::GradVarName("Y")); + auto dX = context.Output(framework::GradVarName("X")); + dX->mutable_data(context.GetPlace()); + + const int batch_size = Y->dims()[0]; + const int class_num = Y->dims()[1]; + + Eigen::DSizes along_class(1); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, class_num); + + auto Y_eigen = EigenMatrix::From(*Y); + auto dY_eigen = EigenMatrix::From(*dY); + auto dX_eigen = EigenMatrix::From(*dX); + auto place = context.GetEigenDevice(); + + auto dot = (Y_eigen * dY_eigen) + .sum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class); + dX_eigen.device(place) = (dY_eigen - dot) * Y_eigen; + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h index 9049ffda1da5408411687474c5ed0c76c2394623..931740e150946a939b8656be5a30185c6ee1cb8f 100644 --- a/paddle/operators/type_alias.h +++ b/paddle/operators/type_alias.h @@ -15,13 +15,14 @@ #pragma once #include "paddle/framework/eigen.h" -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" namespace paddle { namespace operators { using OpKernel = framework::OpKernel; +using OperatorBase = framework::OperatorBase; using InferShapeContext = framework::InferShapeContext; using ExecutionContext = framework::ExecutionContext; using Variable = framework::Variable; @@ -43,15 +44,16 @@ template using EigenTensor = framework::EigenTensor; using Tensor = framework::Tensor; +using Scope = framework::Scope; using OperatorWithKernel = framework::OperatorWithKernel; +using OperatorBase = framework::OperatorBase; using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; using OpProto = framework::OpProto; using OpAttrChecker = framework::OpAttrChecker; using CPUPlace = platform::CPUPlace; using GPUPlace = platform::GPUPlace; -using NetOp = framework::NetOp; using OpRegistry = framework::OpRegistry; -using OperatorBase = framework::OperatorBase; + } // namespace operators } // namespace paddle diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 2038fafe2e15ec2631726643695ac6cbc317fed9..08b5b2cff900cc4239a615fe7d7f6b5faa13510b 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -40,7 +40,7 @@ class DeviceContext { class CPUDeviceContext : public DeviceContext { public: CPUDeviceContext(); - CPUDeviceContext(CPUPlace); + explicit CPUDeviceContext(CPUPlace); virtual ~CPUDeviceContext() {} Eigen::DefaultDevice* eigen_device() const; @@ -69,10 +69,10 @@ class CUDADeviceContext : public DeviceContext { // clang-format off /*! \brief Return cublas handle in the device context. */ - cublasHandle_t cublas_handle (); + cublasHandle_t cublas_handle(); /*! \brief Return cudnn handle in the device context. */ - cudnnHandle_t cudnn_handle (); + cudnnHandle_t cudnn_handle(); /*! \brief Return curand handle in the device context. */ curandGenerator_t curand_generator(); diff --git a/paddle/platform/device_context_test.cc b/paddle/platform/device_context_test.cc index af2ce17fc2238dda62e9888ebe9426edcd55d2bc..65345c433c0a328e7f89038a39312edba35eb8c7 100644 --- a/paddle/platform/device_context_test.cc +++ b/paddle/platform/device_context_test.cc @@ -15,24 +15,28 @@ limitations under the License. */ #include "paddle/platform/device_context.h" #include "gtest/gtest.h" -using DEVICE_GPU = Eigen::GpuDevice; TEST(Device, Init) { + using paddle::platform::DeviceContext; + using paddle::platform::CUDADeviceContext; + using paddle::platform::GPUPlace; + int count = paddle::platform::GetDeviceCount(); for (int i = 0; i < count; i++) { - paddle::platform::DeviceContext* device_context = - new paddle::platform::CUDADeviceContext(i); + DeviceContext* device_context = new CUDADeviceContext(GPUPlace(i)); Eigen::GpuDevice* gpu_device = - device_context->template get_eigen_device(); + device_context->template get_eigen_device(); ASSERT_NE(nullptr, gpu_device); delete device_context; } } TEST(Device, CUDADeviceContext) { + using paddle::platform::CUDADeviceContext; + using paddle::platform::GPUPlace; + int count = paddle::platform::GetDeviceCount(); for (int i = 0; i < count; i++) { - paddle::platform::CUDADeviceContext* device_context = - new paddle::platform::CUDADeviceContext(i); + CUDADeviceContext* device_context = new CUDADeviceContext(GPUPlace(i)); Eigen::GpuDevice* gpu_device = device_context->eigen_device(); ASSERT_NE(nullptr, gpu_device); cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); diff --git a/paddle/platform/dynload/cublas.cc b/paddle/platform/dynload/cublas.cc index 4e3dfdaefb2348346e8f917b1f6c758bf6d91a1a..9cd2a1f565526f8dc45932ba6168f4e25c6ad238 100644 --- a/paddle/platform/dynload/cublas.cc +++ b/paddle/platform/dynload/cublas.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include namespace paddle { diff --git a/paddle/platform/dynload/cudnn.cc b/paddle/platform/dynload/cudnn.cc index 8b5e15b5efcdae6a1eed09f002eb2f4f2163035f..d3e4cb567d71b987724366b6a0896f5df0eb6055 100644 --- a/paddle/platform/dynload/cudnn.cc +++ b/paddle/platform/dynload/cudnn.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include namespace paddle { @@ -25,4 +39,4 @@ CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP); } // namespace dynload } // namespace platform -} // namespace paddle \ No newline at end of file +} // namespace paddle diff --git a/paddle/platform/dynload/curand.cc b/paddle/platform/dynload/curand.cc index 5c1fab992c98569d4a95b6e699d97d428511e48e..d05dd88126bfee7278e553710a717b8f2eb02ae0 100644 --- a/paddle/platform/dynload/curand.cc +++ b/paddle/platform/dynload/curand.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include namespace paddle { @@ -10,6 +24,7 @@ void *curand_dso_handle; #define DEFINE_WRAP(__name) DynLoad__##__name __name CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP); -} -} -} \ No newline at end of file + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index 60a42c777d1c2ebbc22fdb77b1100cc6fcf7ff35..bc0715656a7d61774d53d4a0643ec1c105706085 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -162,5 +162,50 @@ inline void throw_on_error(T e) { } \ } while (0) +/* + * Some enforce helpers here, usage: + * int a = 1; + * int b = 2; + * PADDLE_ENFORCE_EQ(a, b); + * + * will raise an expression described as follows: + * "enforce a == b failed, 1 != 2" with detailed stack infomation. + * + * extra messages is also supported, for example: + * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) + */ + +#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \ + __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__) +#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \ + __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__) +#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \ + __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__) +#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \ + __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__) +#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \ + __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) +#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ + __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) + +// if two values have different data types, choose a compatible type for them. +template +struct CompatibleType { + static const bool t1_to_t2 = std::is_convertible::value; + typedef typename std::conditional::type type; +}; + +#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ + PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0) \ + __CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1), \ + "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ + #__VAL0, #__VAL1, std::to_string(__VAL0), \ + std::to_string(__VAL1), \ + paddle::string::Sprintf("" __VA_ARGS__)); + +#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL) \ + typename paddle::platform::CompatibleType::type(__VAL) + } // namespace platform } // namespace paddle diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 2ac31812a80d8dd57ce82234cb5835e029a46067..7117b49474044af08ae9db79c2fae6693e966af2 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -34,3 +34,165 @@ TEST(ENFORCE, FAILED) { } ASSERT_TRUE(in_catch); } + +TEST(ENFORCE, NO_ARG_OK) { + int a = 2; + int b = 2; + PADDLE_ENFORCE_EQ(a, b); + // test enforce with extra message. + PADDLE_ENFORCE_EQ(a, b, "some thing wrong %s", "info"); +} + +TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { + int a = 2; + bool in_catch = false; + + try { + PADDLE_ENFORCE_EQ(a, 1 + 3); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "enforce a == 1 + 3 failed, 2 != 4"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} + +TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { + int a = 2; + bool in_catch = false; + + try { + PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their"); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = + "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} + +TEST(ENFORCE_NE, OK) { + PADDLE_ENFORCE_NE(1, 2); + PADDLE_ENFORCE_NE(1.0, 2UL); +} +TEST(ENFORCE_NE, FAIL) { + bool in_catch = false; + + try { + // 2UL here to check data type compatible + PADDLE_ENFORCE_NE(1.0, 1UL); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} + +TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } +TEST(ENFORCE_GT, FAIL) { + bool in_catch = false; + + try { + // 2UL here to check data type compatible + PADDLE_ENFORCE_GT(1, 2UL); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} + +TEST(ENFORCE_GE, OK) { + PADDLE_ENFORCE_GE(2, 2UL); + PADDLE_ENFORCE_GE(3, 2UL); + PADDLE_ENFORCE_GE(3, 2); + PADDLE_ENFORCE_GE(3.21, 2UL); +} +TEST(ENFORCE_GE, FAIL) { + bool in_catch = false; + + try { + PADDLE_ENFORCE_GE(1, 2UL); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "enforce 1 >= 2UL failed, 1 < 2"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} + +TEST(ENFORCE_LE, OK) { + PADDLE_ENFORCE_LE(1, 1); + PADDLE_ENFORCE_LE(1, 1UL); + PADDLE_ENFORCE_LE(2, 3UL); + PADDLE_ENFORCE_LE(2UL, 3); + PADDLE_ENFORCE_LE(2UL, 3.2); +} +TEST(ENFORCE_LE, FAIL) { + bool in_catch = false; + + try { + PADDLE_ENFORCE_GT(1, 2UL); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} + +TEST(ENFORCE_LT, OK) { + PADDLE_ENFORCE_LT(3, 10); + PADDLE_ENFORCE_LT(2, 3UL); + PADDLE_ENFORCE_LT(2UL, 3); +} +TEST(ENFORCE_LT, FAIL) { + bool in_catch = false; + + try { + PADDLE_ENFORCE_LT(1UL, 0.12); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} diff --git a/paddle/platform/place.h b/paddle/platform/place.h index 7cead183884bc9379355cd931921b40d6c11ce90..a82e8c942fa28297d91056a66b61f085f2bdb946 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -32,7 +32,7 @@ struct CPUPlace { struct GPUPlace { GPUPlace() : GPUPlace(0) {} - GPUPlace(int d) : device(d) {} + explicit GPUPlace(int d) : device(d) {} // needed for variant equality comparison inline bool operator==(const GPUPlace &o) const { return device == o.device; } diff --git a/paddle/string/piece.h b/paddle/string/piece.h index 0272529d1c9b2cb6000a26f1d4d80276d06bf27b..03ae9243a4cc4e9e92e376bf46ab2b1d7162dfcb 100644 --- a/paddle/string/piece.h +++ b/paddle/string/piece.h @@ -39,8 +39,8 @@ public: // size_ is 0. Piece(); Piece(const char* d, size_t n); - Piece(const char* d); - Piece(const std::string& s); + Piece(const char* d); // NOLINT: accept C string into Piece. + Piece(const std::string& s); // NOLINT: accept C++ string into Piece. const char* data() const { return data_; } size_t len() const { return size_; } diff --git a/paddle/trainer/tests/compare_sparse_data b/paddle/trainer/tests/compare_sparse_data new file mode 100644 index 0000000000000000000000000000000000000000..18fc6541383d8e8e1687b8fe1abd57aece3d4cfc Binary files /dev/null and b/paddle/trainer/tests/compare_sparse_data differ diff --git a/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf b/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf new file mode 100644 index 0000000000000000000000000000000000000000..92f32a18c0068ab4672034a270aa8c52f2716d59 --- /dev/null +++ b/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf @@ -0,0 +1,154 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. + +# Note: when making change to this file, please make sure +# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest +# for comparing these two nets can pass (test_CompareTwoNets) + +default_initial_std(0.1) +default_device(0) + +word_dim = 999 +l1 = 0 +l2 = 0 + +model_type("nn") + +sparse_update = get_config_arg("sparse_update", bool, False) + +TrainData(ProtoData( + type = "proto_sequence", + files = ('trainer/tests/train_sparse.list'), + )) + +Settings( + algorithm='sgd', + batch_size=100, + learning_rate=0.0001, + learning_rate_decay_a=4e-08, + learning_rate_decay_b=0.0, + learning_rate_schedule='poly', +) + + +wordvec_dim = 32 +layer2_dim = 16 +layer3_dim = 16 +hidden_dim = 32 + +slot_names = ["qb", "qw", "tb", "tw"] + +def ltr_network(network_name, + word_dim=word_dim, + wordvec_dim=wordvec_dim, + layer2_dim=layer2_dim, + layer3_dim=layer3_dim, + hidden_dim=hidden_dim, + slot_names=slot_names, + l1=l1, + l2=l2): + + slotnum = len(slot_names) + for i in xrange(slotnum): + Inputs(slot_names[i] + network_name) + for i in xrange(slotnum): + Layer( + name = slot_names[i] + network_name, + type = "data", + size = word_dim, + device = -1, + ) + Layer( + name = slot_names[i] + "_embedding_" + network_name, + type = "mixed", + size = wordvec_dim, + bias = False, + device = -1, + inputs = TableProjection(slot_names[i] + network_name, + parameter_name = "embedding.w0", + decay_rate_l1=l1, + sparse_remote_update = True, + sparse_update = sparse_update, + ), + ) + Layer( + name = slot_names[i] + "_rnn1_" + network_name, + type = "recurrent", + active_type = "tanh", + bias = Bias(initial_std = 0, + parameter_name = "rnn1.bias"), + inputs = Input(slot_names[i] + "_embedding_" + network_name, + parameter_name = "rnn1.w0") + ) + Layer( + name = slot_names[i] + "_rnnlast_" + network_name, + type = "seqlastins", + inputs = [ + slot_names[i] + "_rnn1_" + network_name, + ], + ) + + Layer( + name = "layer2_" + network_name, + type = "fc", + active_type = "tanh", + size = layer2_dim, + bias = Bias(parameter_name = "layer2.bias"), + inputs = [Input(slot_name + "_rnnlast_" + network_name, + parameter_name = "_layer2_" + slot_name + ".w", + decay_rate = l2, + initial_smart = True) for slot_name in slot_names] + ) + Layer( + name = "layer3_" + network_name, + type = "fc", + active_type = "tanh", + size = layer3_dim, + bias = Bias(parameter_name = "layer3.bias"), + inputs = [ + Input("layer2_" + network_name, + parameter_name = "_layer3.w", + decay_rate = l2, + initial_smart = True), + ] + ) + Layer( + name = "output_" + network_name, + type = "fc", + size = 1, + bias = False, + inputs = [ + Input("layer3_" + network_name, + parameter_name = "_layerO.w"), + ], + ) + + +ltr_network("left") +ltr_network("right") +Inputs("label") +Layer( + name = "label", + type = "data", + size = 1, + ) +Outputs("cost", "qb_rnnlast_left") +Layer( + name = "cost", + type = "rank-cost", + inputs = ["output_left", "output_right", "label"], + ) diff --git a/paddle/trainer/tests/test_CompareSparse.cpp b/paddle/trainer/tests/test_CompareSparse.cpp index a7000eb77e1bbeab4f6e38c0322f82bde7164080..813275518e411d6e963e23df634541f771096e0f 100644 --- a/paddle/trainer/tests/test_CompareSparse.cpp +++ b/paddle/trainer/tests/test_CompareSparse.cpp @@ -23,7 +23,7 @@ using namespace paddle; // NOLINT using namespace std; // NOLINT static const string& configFile1 = - "trainer/tests/sample_trainer_config_qb_rnn.conf"; + "trainer/tests/sample_trainer_config_compare_sparse.conf"; DECLARE_bool(use_gpu); DECLARE_string(config); diff --git a/paddle/trainer/tests/train_sparse.list b/paddle/trainer/tests/train_sparse.list new file mode 100644 index 0000000000000000000000000000000000000000..6ea020e2202f8464f8a647cd96c84a9d17a03ae3 --- /dev/null +++ b/paddle/trainer/tests/train_sparse.list @@ -0,0 +1 @@ +trainer/tests/compare_sparse_data diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index f885b2834e8ad502b752c6fd53daf7ef1693433f..0a2a1ced11ee5cb2fb407b229ce810d553c2fa46 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -133,7 +133,7 @@ def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train100(), 10, "cifar_train100") - paddle.v2.dataset.common.convert(path, test100(), 10, "cifar_test100") - paddle.v2.dataset.common.convert(path, train10(), 10, "cifar_train10") - paddle.v2.dataset.common.convert(path, test10(), 10, "cifar_test10") + paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100") + paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100") + paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10") + paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 111496618dfa997246d0a067b0cd4c7dad74f9dc..053ae151c571e5557c9f2f9f4ec866f546a77797 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -32,17 +32,22 @@ __all__ = [ DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') + # When running unit tests, there could be multiple processes that # trying to create DATA_HOME directory simultaneously, so we cannot # use a if condition to check for the existence of the directory; # instead, we use the filesystem as the synchronization mechanism by # catching returned errors. -try: - os.makedirs(DATA_HOME) -except OSError as exc: - if exc.errno != errno.EEXIST: - raise - pass +def must_mkdirs(path): + try: + os.makedirs(DATA_HOME) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + pass + + +must_mkdirs(DATA_HOME) def md5file(fname): @@ -93,6 +98,19 @@ def fetch_all(): "fetch")() +def fetch_all_recordio(path): + for module_name in filter(lambda x: not x.startswith("__"), + dir(paddle.v2.dataset)): + if "convert" in dir( + importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \ + not module_name == "common": + ds_path = os.path.join(path, module_name) + must_mkdirs(ds_path) + getattr( + importlib.import_module("paddle.v2.dataset.%s" % module_name), + "convert")(ds_path) + + def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump): """ you can call the function as: diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index f8aae52e7c29d86c7da9c1da0dd1d093634d4567..23f5a24a1cea7f665fb65e802e1a7811df78208d 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -233,5 +233,5 @@ def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, test(), 10, "conl105_train") - paddle.v2.dataset.common.convert(path, test(), 10, "conl105_test") + paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train") + paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test") diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index c0ec5992e0e6b0a2fd2359910d0f7a6c690c2ec3..93dd3e8f7d3a569eaf56335f0f92bed04c0ee26c 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -173,5 +173,5 @@ def convert(path): Converts dataset to recordio format """ w = word_dict() - paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train") - paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test") + paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train") + paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index b18ee8e9ba91e0e8ccf061223b3c0d4636442956..617c722c4165cdfed9e650fc968d623ef6ed4391 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -155,6 +155,7 @@ def convert(path): N = 5 word_dict = build_dict() paddle.v2.dataset.common.convert(path, - train(word_dict, N), 10, "imikolov_train") + train(word_dict, N), 1000, + "imikolov_train") paddle.v2.dataset.common.convert(path, - test(word_dict, N), 10, "imikolov_test") + test(word_dict, N), 1000, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index ea5891f4f3f6ee1c5023cccee9732cbd9d78b881..9f675bed895223e054cd3bb6e504fe1607f19858 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -119,5 +119,5 @@ def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 10, "minist_train") - paddle.v2.dataset.common.convert(path, test(), 10, "minist_test") + paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train") + paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index d9372d422a3293eddeb7c0d5b7c8980f55c44690..5b61a9420af1bb81e1d826f8a7b69f34c306d382 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -254,8 +254,8 @@ def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 10, "movielens_train") - paddle.v2.dataset.common.convert(path, test(), 10, "movielens_test") + paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train") + paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test") if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index e33f120c8734621fd60497298d993e6e43bd06e0..b0b9757c1a75d215cf8945b5cedbb1239fd43af7 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -137,5 +137,5 @@ def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train, 10, "sentiment_train") - paddle.v2.dataset.common.convert(path, test, 10, "sentiment_test") + paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train") + paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test") diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index ec10ce646ebf3eca2c2a6423b69ee11b6a2b99cf..ce60aa21c2ad1fb8f089d19d548b59a8c806d1ee 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -119,5 +119,5 @@ def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 10, "uci_housing_train") - paddle.v2.dataset.common.convert(path, test(), 10, "uci_houseing_test") + paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train") + paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 2a631c365f27a6039021a56268a62017638c2739..95a35d97ce9d9503153974cc167ee60829244d5f 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -169,5 +169,6 @@ def convert(path): Converts dataset to recordio format """ dict_size = 30000 - paddle.v2.dataset.common.convert(path, train(dict_size), 10, "wmt14_train") - paddle.v2.dataset.common.convert(path, test(dict_size), 10, "wmt14_test") + paddle.v2.dataset.common.convert(path, + train(dict_size), 1000, "wmt14_train") + paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test") diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 4619b0edc3dd7e253e01f7fee5e6a8641340d291..e66197030e2dd9e113e4564aaacb1c5dab25771b 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,4 +13,5 @@ add_python_test(test_framework test_sigmoid_op.py test_softmax_op.py test_rowwise_add_op.py - test_network.py) + test_network.py + gradient_checker.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..4022de1c40e41aa77a7f31d82b55b63585cbd5f5 --- /dev/null +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -0,0 +1,90 @@ +import paddle.v2.framework.core as core +from paddle.v2.framework.create_op_creation_methods import op_creations +import numpy +import unittest + +__all__ = ['get_numeric_gradient'] + + +def get_numeric_gradient(op, + input_values, + output_name, + input_to_check, + delta=1e-2, + local_scope=None): + """ + Get Numeric Gradient for an operator's input. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is + variable name. Value is numpy array. + :param output_name: The final output variable name. + :param input_to_check: The input variable need to get gradient. + :param delta: The perturbation value for numeric gradient method. The + smaller delta is, the more accurate result will get. But if that delta is + too small, it could occur numerical stability problem. + :param local_scope: The local scope used for get_numeric_gradient. + :return: The gradient array in numpy format. + """ + if local_scope is None: + local_scope = core.Scope() + + # Create all input variable in local_scope + for var_name in input_values: + var = local_scope.new_var(var_name) + tensor = var.get_tensor() + tensor.set_dims(input_values[var_name].shape) + tensor.alloc_float(core.CPUPlace()) + tensor.set(input_values[var_name], core.CPUPlace()) + + # Create all output variable in local_scope + for output in op.outputs(): + if local_scope.find_var(output) is None: + local_scope.new_var(output).get_tensor() + + op.infer_shape(local_scope) + + # allocate output memory + for output in op.outputs(): + local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace()) + + # TODO(yuyang18): Only CPU is support now. + cpu_ctx = core.DeviceContext.create(core.CPUPlace()) + + def get_output(): + op.run(local_scope, cpu_ctx) + return numpy.array(local_scope.find_var(output_name).get_tensor()).sum() + + def product(dim): + return reduce(lambda a, b: a * b, dim, 1) + + tensor_to_check = local_scope.find_var(input_to_check).get_tensor() + tensor_size = product(tensor_to_check.get_dims()) + gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32') + for i in xrange(tensor_size): + origin = tensor_to_check.get_float_element(i) + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + tensor_to_check.set_float_element(i, origin) # restore old value + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + return gradient_flat.reshape(tensor_to_check.get_dims()) + + +if __name__ == '__main__': + + class GetNumericGradientTest(unittest.TestCase): + def test_add_op(self): + add_op = op_creations.add_two(X="X", Y="Y", Out="Z") + x = numpy.random.random((10, 1)).astype("float32") + y = numpy.random.random((10, 1)).astype("float32") + + arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') + self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) + + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index 191b698c1cdec9b86b4ded6b1f743586867ca62f..c80888128781d98e4ed30d845a30b39121f66459 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,6 +1,10 @@ import unittest -from op_test_util import OpTestMeta + import numpy as np +import paddle.v2.framework.core as core +import paddle.v2.framework.create_op_creation_methods as creation + +from op_test_util import OpTestMeta def stable_softmax(x): @@ -19,5 +23,63 @@ class TestSoftmaxOp(unittest.TestCase): self.Y = np.apply_along_axis(stable_softmax, 1, self.X) +class TestSoftmaxGradOp(unittest.TestCase): + def test_softmax_grad(self): + op = creation.op_creations.softmax(X="X", Y="Y") + backward_op = core.Operator.backward(op, set()) + self.assertEqual(backward_op.type(), "softmax_grad") + expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' + self.assertEqual(expected, str(backward_op)) + + batch_size = 3 + class_num = 5 + # Initialize X and add 1e-2 for numerical stability + Y = np.random.rand(batch_size, class_num).astype(np.float32) + Y = Y + 1e-2 + dY = np.random.rand(batch_size, class_num).astype(np.float32) + + # Reference implementation of cross entropy with soft labels + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(batch_size): + d = np.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + expected = label_softmax_grad(Y, dY) + + scope = core.Scope() + places = [] + places.append(core.CPUPlace()) + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + + for place in places: + y = scope.new_var("Y") + y_tensor = y.get_tensor() + y_tensor.set_dims([batch_size, class_num]) + y_tensor.alloc_float(place) + y_tensor.set(Y, place) + + dy = scope.new_var("Y@GRAD") + dy_tensor = dy.get_tensor() + dy_tensor.set_dims([batch_size, class_num]) + dy_tensor.alloc_float(place) + dy_tensor.set(dY, place) + + x = scope.new_var("X") + dx = scope.new_var("X@GRAD") + + tensor = scope.find_var("X@GRAD").get_tensor() + backward_op.infer_shape(scope) + self.assertEqual([batch_size, class_num], tensor.shape()) + + ctx = core.DeviceContext.create(place) + backward_op.run(scope, ctx) + actual = np.array(tensor) + + np.testing.assert_almost_equal(actual, expected, decimal=3) + + if __name__ == '__main__': unittest.main()