diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 6320b17520a687f88993b6f464d9115838b0f96b..52a22c1fbf4779fa3c0ca687cab664bd3ca0410a 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -62,29 +62,33 @@ endif() ## Then find the reference-cblas. www.netlib.org/blas/ - - set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH "Folder contains reference-cblas") -set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS - ${REFERENCE_CBLAS_ROOT}/include - /usr/include - /usr/include/cblas -) - -set(REFERENCE_CBLAS_LIB_SEARCH_PATHS - ${REFERENCE_CBLAS_ROOT}/lib - /usr/lib - /usr/lib/blas/reference/ - /usr/lib/reference/ -) +if(NOT CMAKE_CROSSCOMPILING) + set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS + ${REFERENCE_CBLAS_ROOT}/include + /usr/include + /usr/include/cblas + ) + + set(REFERENCE_CBLAS_LIB_SEARCH_PATHS + ${REFERENCE_CBLAS_ROOT}/lib + /usr/lib + /usr/lib/blas/reference/ + /usr/lib/reference/ + ) +else() + # Diable the finding of reference cblas under host's system path + set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include) + set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib) +endif() find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) -if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) +if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) set(CBLAS_FOUND ON) set(CBLAS_PROVIDER REFERENCE) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index 0853b981813c5d60a12603471df7e0b216b0822f..aa249159470773241e0f6da2e8e086264634dd4a 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) IF(APPLE) - SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh) + SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh) ELSE() - SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin) + SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin) ENDIF() ExternalProject_Add( extern_grpc DEPENDS protobuf zlib GIT_REPOSITORY "https://github.com/grpc/grpc.git" - GIT_TAG "v1.8.x" + GIT_TAG "v1.11.x" PREFIX ${GRPC_SOURCES_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake index 71f54c425d4c38e271a8f1b78887d95a27252443..80282329c6ac65fbd1493a6838efca4bd9cadaad 100644 --- a/cmake/external/snappy.cmake +++ b/cmake/external/snappy.cmake @@ -11,19 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -IF(MOBILE_INFERENCE) +if(MOBILE_INFERENCE OR RPI) return() -ENDIF() +endif() include (ExternalProject) # NOTE: snappy is needed when linking with recordio -SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) -SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) -SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE) +set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) +set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) +set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE) + +set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a") ExternalProject_Add( extern_snappy @@ -51,8 +52,7 @@ ExternalProject_Add( ) add_library(snappy STATIC IMPORTED GLOBAL) -set_property(TARGET snappy PROPERTY IMPORTED_LOCATION - "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a") +set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES}) include_directories(${SNAPPY_INCLUDE_DIR}) add_dependencies(snappy extern_snappy) diff --git a/cmake/external/snappystream.cmake b/cmake/external/snappystream.cmake index 8f7a3bf8eeaef75c8840f4ea318b484d33249bb7..20a96430823d07a07d4bb4602e7fc0cfe55c3bf2 100644 --- a/cmake/external/snappystream.cmake +++ b/cmake/external/snappystream.cmake @@ -11,9 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -IF(MOBILE_INFERENCE) +IF(MOBILE_INFERENCE OR RPI) return() ENDIF() @@ -21,9 +20,11 @@ include (ExternalProject) # NOTE: snappy is needed when linking with recordio -SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) -SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) -SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE) +set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) +set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) +set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE) + +set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a") ExternalProject_Add( extern_snappystream @@ -51,8 +52,7 @@ ExternalProject_Add( ) add_library(snappystream STATIC IMPORTED GLOBAL) -set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION - "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a") +set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES}) include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers. include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers. diff --git a/cmake/generic.cmake b/cmake/generic.cmake index c4c9f77df8d57fe162616d2250bd4dfe5b7754e7..1d3e2ade6d393c6e4c37eea0dc1064cdb18808a5 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -195,14 +195,7 @@ function(cc_library TARGET_NAME) list(REMOVE_ITEM cc_library_DEPS warpctc) add_dependencies(${TARGET_NAME} warpctc) endif() - if("${cc_library_DEPS}" MATCHES "ARCHIVE_START") - # Support linking flags: --whole-archive (Linux) / -force_load (MacOS). - # WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries. - target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) - list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END) - else() - target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) - endif() + target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) endif() @@ -243,11 +236,7 @@ function(cc_test TARGET_NAME) set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - # Support linking flags: --whole-archive (Linux) / -force_load (MacOS) - target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) - if("${cc_test_DEPS}" MATCHES "ARCHIVE_START") - list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END) - endif() + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} ${cc_test_ARGS} diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 0323cd9698cba916d2aa04403be97c0a6a463830..cc758019827b9a5416a801e4da43d754d4492a73 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -1,7 +1,22 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + set_property(GLOBAL PROPERTY FLUID_MODULES "") # find all fluid modules is used for paddle fluid static library function(find_fluid_modules TARGET_NAME) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) string(FIND "${__target_path}" "fluid" pos) if(pos GREATER 1) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) @@ -77,6 +92,23 @@ elseif (WITH_MKLML) ) endif() +if(NOT MOBILE_INFERENCE AND NOT RPI) + set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy") + copy(snappy_lib + SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + + set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream") + copy(snappystream_lib + SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + + set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib") + copy(zlib_lib + SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) +endif() + # paddle fluid module set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index c44f8a8a8ecc1ba1f886fc41aec863b4ca3458a6..8b1ca5e16548334ed0c9a6d31b88e0805304579e 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY) endif() add_subdirectory(testing) -if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS) +if(NOT MOBILE_INFERENCE AND NOT RPI) add_subdirectory(fluid) endif() diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index d725763b01d5953985f8e090605f68a8419b5498..d274d96c29bdbf5973d568d783369c3975bdc436 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(platform) add_subdirectory(framework) add_subdirectory(operators) add_subdirectory(pybind) -add_subdirectory(inference) add_subdirectory(string) add_subdirectory(recordio) +# NOTE: please add subdirectory inference at last. +add_subdirectory(inference) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 3840bbe83b68dc2a49aa73feb57a80e9992cad5f..1f3ca24df16cf080d325fbdc0d613a828e384b2a 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD COMMENT "Copy generated python proto into directory paddle/fluid/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -cc_library(backward SRCS backward.cc DEPS net_op) -cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope -framework_proto backward glog lod_rank_table feed_fetch_method) +framework_proto glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor) diff --git a/paddle/fluid/framework/backward.cc b/paddle/fluid/framework/backward.cc deleted file mode 100644 index 1314af2b3dab281bd201e6a77bfbe87e0bd58ffb..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/backward.cc +++ /dev/null @@ -1,585 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/backward.h" -#include "paddle/fluid/operators/net_op.h" - -#include -#include -#include -#include - -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/net_op.h" - -namespace paddle { -namespace framework { - -static std::unordered_set* g_ctrl_flow_ops_ = nullptr; -// Control Flow operators's backward is significantly different from -// computational operators. Hack Code here. -// We should design a better way to backward CtrlFlowOps. -static std::unordered_set& CtrlFlowOps() { - if (g_ctrl_flow_ops_ == nullptr) { - g_ctrl_flow_ops_ = new std::unordered_set{ - "increment", "lod_rank_table", "less_than"}; - } - return *g_ctrl_flow_ops_; -} - -static inline std::unique_ptr CreateGradOp( - const OperatorBase& op, const std::unordered_set& no_grad_set, - std::unordered_map* grad_to_var) { - OpDesc op_desc; - op_desc.SetInputMap(op.Inputs()); - op_desc.SetOutputMap(op.Outputs()); - op_desc.SetType(op.Type()); - op_desc.SetAttrMap(op.Attrs()); - auto& info = OpInfoMap::Instance().Get(op.Type()); - auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, {}); - std::vector> grad_ops; - grad_ops.reserve(grad_descs.size()); - std::transform(grad_descs.begin(), grad_descs.end(), - std::back_inserter(grad_ops), - [](const std::unique_ptr& grad_desc) { - return OpRegistry::CreateOp(*grad_desc); - }); - PADDLE_ENFORCE(!grad_ops.empty()); - if (grad_ops.size() == 1) { - return std::move(grad_ops[0]); - } else { - auto net_op = new operators::NetOp(); - for (auto& grad_op : grad_ops) { - net_op->AppendOp(std::move(grad_op)); - } - net_op->CompleteAddOp(); - return std::unique_ptr(net_op); - } -} - -template -static void ForEachVarName(const Map& names, T callback) { - for (auto& name : names) { - for (auto& n : name.second) { - if (callback(n)) return; - } - } -} - -// return whether all the names + suffixes in the set -static bool AllInSet( - const std::map>& names, - const std::string& suffix, const std::unordered_set& set) { - bool all_in_set = true; - ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) { - all_in_set = set.find(n + suffix) != set.end(); - return !all_in_set; - }); - return all_in_set; -} - -static std::unique_ptr NOP() { - auto net_op = new operators::NetOp(); - net_op->SetType("@NOP@"); - net_op->CompleteAddOp(); - return std::unique_ptr(net_op); -} - -// Get backward operator from a forward operator, a recursive implementation. -// -// no_grad_names the gradient variable names without gradient calculating. -// -// uniq_id is a unique index used inside recursively calling -// BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and -// pass `uniq_id` through recursive calling. -// -// returns The backward operator. In a simple situation, it may be a simple -// operator, in a complex situation, it maybe a NetOp. -// -// See Backward.h for details -static std::unique_ptr BackwardRecursive( - const OperatorBase& forwardOp, - std::unordered_set& no_grad_names, - std::unordered_map* grad_to_var, - size_t& uniq_id) { - // If all input gradients of forwarding operator do not need to calculate, - // just return an NOP. Not return null ptr because NOP does not take - // too much time for calculation, but it is useful for simplifying logic. - if (AllInSet(forwardOp.Inputs() /*names*/, kGradVarSuffix /*suffix*/, - no_grad_names /*set*/)) { - return NOP(); - } - - // All output gradients of forwarding operator do not need to calculate. - // Then all input gradients cannot be computed at all, and we put them into - // `no_grad_names` set. Return an NOP. - if (AllInSet(forwardOp.Outputs() /*names*/, kGradVarSuffix /*suffix*/, - no_grad_names /*set*/)) { - ForEachVarName(forwardOp.Inputs(), - [&no_grad_names](const std::string& name) -> bool { - no_grad_names.insert(GradVarName(name)); - return false; - }); - return NOP(); - } - - // Returned gradient network - auto net = std::unique_ptr(new operators::NetOp()); - - if (forwardOp.IsNetOp()) { - // Because forwardOp is a net op, it can static_cast. - auto& forwardNet = static_cast(forwardOp); - - // Map from output gradient variable name to operator's indices in - // backward net's ops_. That operator generates that variable. - std::unordered_map> dup_output_ops; - - size_t local_op_id = 0; - // reversely travel forwardNet and collect all duplicate outputs. - for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); - ++it, ++local_op_id) { - auto& fwd = *it; - auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id); - ForEachVarName(bwd->Outputs(), - [&dup_output_ops, local_op_id](const std::string& out) { - dup_output_ops[out].emplace_back(local_op_id); - return false; - }); - net->AppendOp(std::move(bwd)); - } - // Get unique ID for this method. - auto uid = uniq_id++; - // TODO(dzh): more comment - // multiple operators which have the same output (y for example) may - // overwrite the same y variable when backward, special operations are token - // to handle this case. For each duplicate output, rename it to an alias - // (original name with a offset), append an `add` op for its operator, - // and finally sum all the alias variable to the final output variable y. - using Pos = std::pair>; - std::list insert_position; - for (auto& dup_output_op : dup_output_ops) { - const std::string& name = dup_output_op.first; - // duplicate @Empty@ don't need to be added - if (name == kEmptyVarName) continue; - - auto& dup_op = dup_output_op.second; - // no duplicate output - if (dup_op.size() == 1) continue; - - // process the duplicate outputs - std::vector dup_outputs; - for (size_t i = 0; i < dup_op.size(); ++i) { - // rename each duplicate output to an alias - auto op_offset = dup_op[i]; - dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" + - std::to_string(i)); - net->ops_[op_offset]->Rename(name, dup_outputs.back()); - } - // collect all the offset for each alias, - // insert a sum operator to add all aliases to output - insert_position.push_back( - {dup_op.back(), - OpRegistry::CreateOp("sum", {{"X", dup_outputs}}, {{"Out", {name}}}, - AttributeMap{})}); - } - - // make sure the inserted `sum` ops follow the BFS order. - insert_position.sort( - [](const Pos& l, const Pos& r) { return l.first > r.first; }); - - for (auto& pos : insert_position) { - net->InsertOp(pos.first + 1, std::move(pos.second)); - } - } else { - std::unique_ptr grad_op( - CreateGradOp(forwardOp, no_grad_names, grad_to_var)); - - ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( - const std::string& grad_input) { - if (no_grad_names.count(grad_input)) { - // +1 for \0 - std::string prefix = grad_input.substr( - 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); - grad_op->Rename(grad_input, prefix + kZeroVarSuffix); - - // If part of input gradient of that operator is not calculated, fill - // zero variables to that input gradient. - net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}}, - {{"Out", {grad_input}}}, - AttributeMap{})); - } - return false; - }); - - ForEachVarName(grad_op->Outputs(), - [&no_grad_names, &grad_op](const std::string& grad_output) { - if (no_grad_names.count(grad_output)) { - grad_op->Rename(grad_output, kEmptyVarName); - } - return false; - }); - - if (net->ops_.empty()) { // Current no aux op is added to network - return grad_op; - } - net->AppendOp(std::move(grad_op)); - } - net->SetType("@GENERATED_BACKWARD@"); - net->CompleteAddOp(); - return std::unique_ptr( - static_cast(net.release())); -} - -// See header for comments -std::unique_ptr Backward( - const OperatorBase& forwardOp, - const std::unordered_set& no_grad_vars) { - std::unordered_set no_grad_names; - no_grad_names.reserve(no_grad_vars.size() + 1); - - no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); - - for (auto& name : no_grad_vars) { - no_grad_names.insert(name + kGradVarSuffix); - } - size_t uid = 0; - std::unordered_map grad_to_var; - return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid); -} - -// ==================================== // - -static bool AllGradInSet(const std::vector& names, - const std::unordered_set& set) { - for (const std::string& name : names) { - if (!set.count(GradVarName(name))) { - return false; - } - } - if (VLOG_IS_ON(10)) { - std::ostringstream sout; - sout << "All input {"; - for (auto& name : names) { - sout << name << ","; - } - sout << "} is in {"; - for (auto& name : set) { - sout << name << ","; - } - sout << "}"; - VLOG(10) << sout.str(); - } - return true; -} - -static std::string FwdName(const std::string& grad_name) { - auto pos = grad_name.find("@GRAD"); - if (pos == std::string::npos) { - return ""; - } else { - return grad_name.substr(0, pos); - } -} - -static void CreateGradVarInBlock( - size_t grad_op_start_index, - const std::unordered_map& param_name_map, - BlockDesc* block_desc, - std::unordered_map* grad_var_record) { - auto ops = block_desc->AllOps(); - for (size_t op_index = grad_op_start_index; op_index < ops.size(); - ++op_index) { - std::unordered_set new_vars; - auto& ctrl_flow_ops = CtrlFlowOps(); - ForEachVarName(ops[op_index]->Outputs(), - [&](const std::string& grad_var_name) { - if (ctrl_flow_ops.find(ops[op_index]->Type()) != - ctrl_flow_ops.end()) { - if (block_desc->HasVarRecursive(grad_var_name)) { - return false; - } - } else { - if (block_desc->HasVar(grad_var_name)) { - return false; - } - } - if (grad_var_name == framework::kEmptyVarName) { - return false; - } - auto var = block_desc->Var(grad_var_name); - VLOG(10) << "Creating Variable " << grad_var_name; - new_vars.insert(var->Name()); - auto it = param_name_map.find(grad_var_name); - if (it == param_name_map.end()) { - return false; - } - auto param_var_name = it->second; - auto& grad_record = (*grad_var_record)[param_var_name]; - grad_record.name_ = grad_var_name; - grad_record.block_idx_ = block_desc->ID(); - grad_record.op_idx_ = static_cast(op_index); - return false; /* not break */ - }); - ops[op_index]->InferVarType(block_desc); - for (auto& arg : ops[op_index]->OutputArgumentNames()) { - if (new_vars.find(arg) == new_vars.end()) { - continue; - } - auto pname = FwdName(arg); - auto* param = block_desc->FindVarRecursive(pname); - auto* grad = block_desc->FindVar(arg); - if (param == nullptr) { - grad->SetDataType(proto::VarType::FP32); - } else { - grad->SetDataType(param->GetDataType()); - } - } - ops[op_index]->InferShape(*block_desc); - } -} - -std::vector> MakeOpGrad( - const OpDesc* op_desc, std::unordered_set* no_grad_vars, - std::unordered_map* grad_to_var, - const std::vector& grad_block = std::vector()) { - std::vector> grad_op_descs; - // All input gradients of forwarding operator do not need to calculate. - const std::vector& inputs = op_desc->InputArgumentNames(); - if (AllGradInSet(inputs, *no_grad_vars)) { - VLOG(10) << "Drop operator " << op_desc->Type(); - return grad_op_descs; // empty vector - } - - // All output gradients of forwarding operator do not need to calculate. - const std::vector& outputs = op_desc->OutputArgumentNames(); - - if (AllGradInSet(outputs, *no_grad_vars)) { - VLOG(10) << "Drop operator " << op_desc->Type(); - // FIXME: Hack code here - auto& ctrl_flow_ops = CtrlFlowOps(); - if (ctrl_flow_ops.find(op_desc->Type()) == ctrl_flow_ops.end()) { - // Only computational op need drop input's gradient. - for (const std::string& name : inputs) { - no_grad_vars->insert(GradVarName(name)); - VLOG(10) << " Also drop " << GradVarName(name); - } - } - - return grad_op_descs; // empty vector - } - - grad_op_descs = - OpInfoMap::Instance() - .Get(op_desc->Type()) - .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block); - - std::list> pending_fill_zeros_ops; - for (auto& desc : grad_op_descs) { - for (const std::string& in_name : desc->InputArgumentNames()) { - if (no_grad_vars->count(in_name)) { - std::string prefix = in_name.substr( - 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); - std::string new_name = prefix + kZeroVarSuffix; - desc->Rename(in_name, new_name); - std::unique_ptr fill_zeros_op( - new OpDesc("fill_zeros_like", {{"X", {prefix}}}, - {{"Out", {new_name}}}, AttributeMap{})); - pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); - } - } - } - - for (auto& p : pending_fill_zeros_ops) { - grad_op_descs.insert(grad_op_descs.begin(), std::move(p)); - } - return grad_op_descs; -} - -static BlockDesc* CreateStepBlock( - ProgramDesc& program_desc, std::unordered_set* no_grad_vars, - std::unordered_map* grad_to_var, - int step_block_idx); - -std::vector> MakeBlockBackward( - ProgramDesc& program_desc, int block_idx, - std::unordered_set* no_grad_vars, - std::unordered_map* grad_to_var) { - VLOG(5) << "MakeBlockBackward"; - BlockDesc* cur_block = program_desc.MutableBlock(block_idx); - std::vector op_descs = cur_block->AllOps(); - std::unordered_map> dup_out_ops; - size_t grad_desc_idx = 0; - std::vector> backward_descs; - - for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { - VLOG(5) << "Making backward " << (*it)->Type() << " op"; - std::vector> op_grads; - - if ((*it)->Type() == "recurrent" || (*it)->Type() == "while" || - (*it)->Type() == "parallel_do") { - int step_block_idx = (*it)->GetBlockAttr("sub_block"); - BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars, - grad_to_var, step_block_idx); - op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); - } else if ((*it)->Type() == "conditional_block") { - BlockDesc* backward_block = - CreateStepBlock(program_desc, no_grad_vars, grad_to_var, - (*it)->GetBlockAttr("sub_block")); - op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); - } else { - op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var); - } - - if (VLOG_IS_ON(10)) { - std::ostringstream sout; - sout << "Made "; - for (auto& op_grad : op_grads) { - sout << op_grad->Type() << " "; - } - VLOG(10) << sout.str(); - } - - for (const auto& desc : op_grads) { - for (const std::string& out_name : desc->OutputArgumentNames()) { - if (out_name.find("@GRAD") == std::string::npos) { - // Not all outputs of a backward operator is a gradient. Only gradient - // need to be sum. Skip variables are not gradient. - continue; - } - dup_out_ops[out_name].emplace_back(grad_desc_idx); - } - ++grad_desc_idx; - } - std::transform(op_grads.begin(), op_grads.end(), - std::back_inserter(backward_descs), - [](std::unique_ptr& ptr) { return std::move(ptr); }); - } - - VLOG(5) << "Appending Sums"; - // Check whether some variables are written more than once - std::list>> pending_sum_ops; - for (const auto& dup : dup_out_ops) { - const std::string& out_name = dup.first; - const std::vector dup_op = dup.second; - if (out_name != kEmptyVarName && dup_op.size() > 1) { - std::vector sum_op_inputs; - std::string next_g_name = out_name; - for (size_t i = 0; i < dup_op.size(); ++i) { - VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name - << " duplicated"; - std::string new_name = out_name + "@RENAME@" + std::to_string(i); - backward_descs[dup_op[i]]->RenameOutput(out_name, new_name); - backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name); - sum_op_inputs.emplace_back(new_name); - next_g_name = sum_op_inputs.back(); - } - std::unique_ptr sum_op(new OpDesc("sum", {{"X", sum_op_inputs}}, - {{"Out", {out_name}}}, - AttributeMap{})); - pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)}); - } - } - - pending_sum_ops.sort([](const std::pair>& a, - const std::pair>& b) { - return a.first > b.first; - }); - for (auto& p : pending_sum_ops) { - backward_descs.insert(backward_descs.begin() + p.first + 1, - std::move(p.second)); - } - - VLOG(5) << "MakeBlockBackward Finished"; - - return backward_descs; -} - -static BlockDesc* CreateStepBlock( - ProgramDesc& program_desc, std::unordered_set* no_grad_vars, - std::unordered_map* grad_to_var, - int step_block_idx) { - auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx, - no_grad_vars, grad_to_var); - BlockDesc* backward_block = - program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx)); - for (auto& ptr : backward_block_op_descs) { - backward_block->AppendAllocatedOp(move(ptr)); - } - return backward_block; -} - -ParamGradInfoMap AppendBackward( - ProgramDesc& program_desc, const VarDesc& target, - const std::unordered_set& no_grad_vars) { - std::unordered_set no_grad_var_names; - no_grad_var_names.reserve(no_grad_vars.size() + 1); - no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); - for (auto& name : no_grad_vars) { - no_grad_var_names.insert(GradVarName(name)); - } - - const int root_block_idx = 0; - auto root_block = program_desc.MutableBlock(root_block_idx); - - std::string fill_one_op_out = GradVarName(target.Name()); - bool is_scalar = target.GetShape() == std::vector{1}; - PADDLE_ENFORCE(is_scalar, "target should be scalar"); - VLOG(3) << "backward from loss=" << target.Name() - << " data_type=" << target.GetDataType(); - std::unique_ptr fill_one_op( - new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}}, - {{"shape", std::vector{1}}, - {"value", static_cast(1.0)}, - {"dtype", target.GetDataType()}})); - // infer var type of fill_one_op - fill_one_op->InferVarType(root_block); - - root_block->AppendAllocatedOp(std::move(fill_one_op)); - size_t forward_op_num = root_block->OpSize(); - size_t forward_block_num = program_desc.Size(); - - // Insert backward operators - std::unordered_map grad_to_var; - auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx, - &no_grad_var_names, &grad_to_var); - - for (auto& ptr : backward_op_descs) { - root_block->AppendAllocatedOp(std::move(ptr)); - } - // Create Variable - - // Create target gradient variable - std::unordered_map retv; - - auto var = root_block->Var(fill_one_op_out); - var->SetDataType(target.GetDataType()); - var->SetShape(target.GetShape()); - auto& target_grad = retv[target.Name()]; - target_grad.name_ = fill_one_op_out; - target_grad.block_idx_ = root_block_idx; - target_grad.op_idx_ = static_cast(forward_op_num); - - // create grad_var for all blocks in this program - CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv); - for (size_t block_index = forward_block_num; - block_index < program_desc.Size(); ++block_index) { - CreateGradVarInBlock(0, grad_to_var, program_desc.MutableBlock(block_index), - &retv); - } - return retv; -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/backward.h b/paddle/fluid/framework/backward.h deleted file mode 100644 index 3a971090c25c85efbf976532c364371baba9a870..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/backward.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" - -namespace paddle { -namespace framework { - -// Create the backward operator from a forward operator. -// TODO(yuyang18): Add more API reference comment. -extern std::unique_ptr Backward( - const OperatorBase& forwardOp, - const std::unordered_set& no_grad_vars); - -struct GradVarInfo { - GradVarInfo() {} - GradVarInfo(const std::string& name, int block_idx, int op_idx) - : name_(name), block_idx_(block_idx), op_idx_(op_idx) {} - - bool operator==(const GradVarInfo& b) const { - return name_ == b.name_ && block_idx_ == b.block_idx_ && - op_idx_ == b.op_idx_; - } - - std::string name_; - int block_idx_; - int op_idx_; -}; - -using ParamGradInfoMap = std::unordered_map; - -ParamGradInfoMap AppendBackward( - ProgramDesc& program_desc, const VarDesc& target, - const std::unordered_set& no_grad_vars); - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/backward_test.cc b/paddle/fluid/framework/backward_test.cc deleted file mode 100644 index cc1f871360ed3f7071364dbb0f932bfd997cadb0..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/backward_test.cc +++ /dev/null @@ -1,918 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/backward.h" - -#include -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/var_desc.h" -#include "paddle/fluid/operators/net_op.h" - -USE_NO_KERNEL_OP(fill_constant); - -namespace paddle { -namespace framework { - -using DeviceContext = platform::DeviceContext; - -class NoneOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override {} -}; - -template -class NoneKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override {} -}; - -class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { - public: - RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input X of Add"); - AddInput("b", "Bias of Add"); - AddOutput("Out", "Out of Add"); - AddComment("Add Op"); - } -}; - -class RowWiseAddGradMaker : public SingleGradOpDescMaker { - public: - using SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - auto grad_op = new OpDesc(); - grad_op->SetInput(GradVarName("Out"), OutputGrad("Out")); - grad_op->SetOutput(GradVarName("X"), InputGrad("X")); - grad_op->SetOutput(GradVarName("b"), InputGrad("b")); - grad_op->SetType("rowwise_add_grad"); - return std::unique_ptr(grad_op); - } -}; - -class MulOpMaker : public OpProtoAndCheckerMaker { - public: - MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "A"); - AddInput("Y", "B"); - AddOutput("Out", "Out"); - AddAttr("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1); - AddAttr("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1); - AddComment("Mul"); - } -}; - -class SigmoidOpMaker : public OpProtoAndCheckerMaker { - public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "X"); - AddOutput("Out", "Y"); - AddComment("Sigmoid"); - } -}; - -class NoGradOpMaker : public OpProtoAndCheckerMaker { - public: - NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "X input"); - AddOutput("Out", "Y output"); - AddComment("NoGradOp, same input output. no Grad"); - } -}; - -class FcOp : public operators::NetOp { - public: - FcOp(const std::string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, const AttributeMap &attrs) - : NetOp(type, inputs, outputs, attrs) { - AppendOp(OpRegistry::CreateOp( - "mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}}, - {{"Out", {Output("mul_result")}}}, AttributeMap{})); - auto input_b = Inputs("b"); - std::string before_act = "mul_result"; - if (input_b.size() != 0) { - AppendOp(OpRegistry::CreateOp( - "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, - {{"Out", {Output("add_result")}}}, AttributeMap{})); - before_act = "add_result"; - } else { - auto out_varname = Output("add_result"); - if (out_varname != kEmptyVarName) { - this->Rename(out_varname, kEmptyVarName); - } - } - - AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, - {{"Out", {Output("Out")}}}, AttributeMap{})); - CompleteAddOp(false); - } -}; - -class FcOpMaker : public OpProtoAndCheckerMaker { - public: - FcOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x"); - AddInput("W", "w"); - AddInput("b", "b"); - AddOutput("mul_result", "").AsIntermediate(); - AddOutput("add_result", "").AsIntermediate(); - AddOutput("Out", ""); - AddComment(""); - } -}; - -class ManyOutputOpMaker : public OpProtoAndCheckerMaker { - public: - ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("x", "x"); - AddOutput("y", "y"); - AddOutput("z", "z"); - AddComment(""); - } -}; - -class FillZeroOpMaker : public OpProtoAndCheckerMaker { - public: - FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x"); - AddOutput("Out", "out"); - AddComment(""); - } -}; - -class SumOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SumOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "the input tensors of sum operator.").AsDuplicable(); - AddOutput("Out", "the output tensor of sum operator."); - AddComment(""); - } -}; - -class MultInOutOpMaker : public OpProtoAndCheckerMaker { - public: - MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "x"); - AddInput("H", "h"); - AddOutput("Y", "y"); - AddOutput("Z", "z"); - AddComment(""); - } -}; - -class MinusGradOpDescMaker : public GradOpDescMakerBase { - public: - using GradOpDescMakerBase::GradOpDescMakerBase; - - std::vector> operator()() const override { - std::vector> retv; - auto x_g = InputGrad("X"); - if (!x_g.empty()) { - auto *op_desc = new OpDesc(); - op_desc->SetType("scale"); - op_desc->SetInput("X", OutputGrad("Out")); - op_desc->SetOutput("Out", x_g); - op_desc->SetAttr("scale", 1.0f); - retv.emplace_back(op_desc); - } - - auto y_g = InputGrad("Y"); - if (!y_g.empty()) { - auto *op_desc = new OpDesc(); - op_desc->SetType("scale"); - op_desc->SetInput("X", OutputGrad("Out")); - op_desc->SetOutput("Out", y_g); - op_desc->SetAttr("scale", -1.0f); - retv.emplace_back(op_desc); - } - return retv; - } -}; - -class MinusOpMaker : public OpProtoAndCheckerMaker { - public: - MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", ""); - AddInput("Y", ""); - AddOutput("Out", ""); - AddComment("minus for unittest"); - } -}; -} // namespace framework -} // namespace paddle - -namespace f = paddle::framework; -namespace ops = paddle::operators; -using EnforceNotMet = paddle::platform::EnforceNotMet; -// rowwise_add -REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker, - f::RowWiseAddGradMaker); -REGISTER_OP_CPU_KERNEL(rowwise_add, - f::NoneKernel); -REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(rowwise_add_grad, - f::NoneKernel); -// mul -REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel); -REGISTER_OP_CPU_KERNEL(mul_grad, - f::NoneKernel); -// sigmoid -REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(sigmoid, - f::NoneKernel); -REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker); -// fill_zeros_like -REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker); -REGISTER_OP_CPU_KERNEL(fill_zeros_like, - f::NoneKernel); -// sum -REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp); -REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel); -REGISTER_OP_CPU_KERNEL(sum_grad, - f::NoneKernel); -// fc -REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); -// many_output_op -REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker, - many_output_op_grad, f::NoneOp); -// mult_in_out -REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad, - f::NoneOp); -REGISTER_OP_CPU_KERNEL(mult_in_out, - f::NoneKernel); -REGISTER_OP_CPU_KERNEL(mult_in_out_grad, - f::NoneKernel); -// minus -REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker); -REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel); -// scale -REGISTER_OPERATOR(scale, f::NoneOp); -REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel); - -TEST(Backward, simple_op_not_need_grad) { - auto fwd = - f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - ASSERT_NE(fwd, nullptr); - auto gop = f::Backward(*fwd, {"x"}); - ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName); - - auto no_input_gop = f::Backward(*fwd, {"x", "b"}); - ASSERT_NE(no_input_gop, nullptr); - ASSERT_TRUE(no_input_gop->IsNetOp()); - ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size()); -} - -TEST(Backward, net_fc_backward_normal) { - std::shared_ptr fwd = - f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, - {{"mul_result", {"mul_res"}}, - {"add_result", {"add_re"}}, - {"Out", {"out"}}}, - f::AttributeMap{}); - ASSERT_NE(fwd, nullptr); - std::shared_ptr gop = - f::Backward(*fwd, std::unordered_set{}); - ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); - - ASSERT_NO_THROW(net->DebugString()); - - ASSERT_EQ(3UL, net->ops_.size()); - - f::OperatorBase &d_sigmoid = *net->ops_[0]; - ASSERT_EQ("sigmoid_grad", d_sigmoid.Type()); - - f::OperatorBase &d_add = *net->ops_[1]; - ASSERT_EQ("rowwise_add_grad", d_add.Type()); - - f::OperatorBase &d_mul = *net->ops_[2]; - ASSERT_EQ("mul_grad", d_mul.Type()); -} - -TEST(Backward, net_fc_backward_not_have_b) { - std::shared_ptr fwd = - f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}}, - {{"mul_result", {"mul_res"}}, - {"add_result", {"add_res"}}, - {"Out", {"tmp"}}}, - f::AttributeMap{}); - ASSERT_NE(fwd, nullptr); - std::shared_ptr gop = - f::Backward(*fwd, std::unordered_set{}); - ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); - - ASSERT_NO_THROW(net->DebugString()); - - ASSERT_EQ(2UL, net->ops_.size()); - - f::OperatorBase &d_sigmoid = *net->ops_[0]; - ASSERT_EQ("sigmoid_grad", d_sigmoid.Type()); - - f::OperatorBase &d_mul = *net->ops_[1]; - ASSERT_EQ("mul_grad", d_mul.Type()); -} - -TEST(Backward, net_input_of_network_not_need_grad) { - ops::NetOp net; - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, - {{"mul_result", {"mul_tmp_0"}}, - {"add_result", {"add_tmp_0"}}, - {"Out", {"hidden0"}}}, - f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, - {{"mul_result", {"mul_tmp_1"}}, - {"add_result", {"add_tmp_1"}}, - {"Out", {"hidden1"}}}, - f::AttributeMap{})); - net.CompleteAddOp(); - auto bwd = Backward(net, {"x"}); // x@GRAD is not need. - ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); - - auto output_vars = bwd_net->OutputVars(true); - std::unordered_set all_outputs = - std::unordered_set(output_vars.begin(), output_vars.end()); - all_outputs.erase(f::kEmptyVarName); - - for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end()); - } - - // Not Generated X - ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end()); - - ASSERT_EQ(2UL, bwd_net->ops_.size()); - ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); - auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); - ASSERT_EQ(3UL, first_fc_grad->ops_.size()); - ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output(f::GradVarName("X"))); -} - -TEST(Backward, net_shared_weight) { - ops::NetOp net; - net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, - {{"Out", {"out"}}}, f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, - {{"Out", {"FinalOut"}}}, - f::AttributeMap{})); - net.CompleteAddOp(); - - auto bwd = f::Backward(net, std::unordered_set{}); - ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); - ASSERT_EQ(3UL, bwd_net->ops_.size()); - ASSERT_EQ("sum", bwd_net->ops_[2]->Type()); -} - -TEST(Backward, op_all_input_are_not_need) { - auto fwd = - f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"x", "b"}); - ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); - ASSERT_TRUE(net->ops_.empty()); -} - -TEST(Backward, op_all_output_are_not_need) { - auto fwd = - f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"out"}); - ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); - ASSERT_TRUE(net->ops_.empty()); -} - -TEST(Backward, op_part_of_output_are_not_need) { - auto fwd = - f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}}, - {{"y", {"Y"}}, {"z", {"Z"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"Z"}); - ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); - ASSERT_EQ(net->ops_.size(), 2UL); - - auto &fill_zero = *net->ops_[0]; - ASSERT_EQ("fill_zeros_like", fill_zero.Type()); - ASSERT_EQ(1UL, fill_zero.Inputs("X").size()); - ASSERT_EQ("Z", fill_zero.Input("X")); - ASSERT_EQ(1UL, fill_zero.Outputs("Out").size()); - ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out")); - - auto &d_many_out = *net->ops_[1]; - ASSERT_EQ("many_output_op_grad", d_many_out.Type()); - ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.Inputs().size()); // I/O/OG - ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, - d_many_out.Input(f::GradVarName("z"))); - ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); - ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); -} - -TEST(Backward, op_part_of_input_are_not_need) { - auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}}, - {{"Out", {"out"}}}, f::AttributeMap{}); - auto backward = f::Backward(*fwd, {"a"}); - auto &grad_mul = *backward; - ASSERT_EQ(grad_mul.Type(), "mul_grad"); - ASSERT_EQ(grad_mul.Inputs().size(), 2UL + 1UL + 1UL); - ASSERT_EQ(grad_mul.Outputs().size(), 2UL); - ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b")); - ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); - ASSERT_EQ(grad_mul.Input("X"), "a"); - ASSERT_EQ(grad_mul.Input("Y"), "b"); - ASSERT_EQ(grad_mul.Input("Out"), "out"); -} - -TEST(Backward, linear_net_intermediate_variable_has_no_grad) { - ops::NetOp net; - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"mul_result", {"mul_out1"}}, - {"add_result", {"add_out1"}}, - {"Out", {"out1"}}}, - f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, - {{"mul_result", {"mul_out2"}}, - {"add_result", {"tmp_out2"}}, - {"Out", {"out2"}}}, - f::AttributeMap{})); - net.AppendOp(f::OpRegistry::CreateOp( - "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, - {{"mul_result", {"mul_out3"}}, - {"add_result", {"tmp_out3"}}, - {"Out", {"out3"}}}, - f::AttributeMap{})); - net.CompleteAddOp(); - - auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); - ASSERT_TRUE(backward->IsNetOp()); - auto bwd_net = static_cast(backward.get()); - ASSERT_EQ(bwd_net->ops_.size(), 3UL); - auto &grad_fc = *bwd_net->ops_[0]; - - const char *all = paddle::operators::NetOp::kAll; - EXPECT_EQ(grad_fc.Inputs(all).size(), - 2UL /* external input number */ - + 1UL /* external output number*/ - + 1UL /* number of gradient of external output*/ - + 2UL /* internal variable number*/ - ); - EXPECT_EQ(grad_fc.Outputs(all).size(), - 2UL /* input number of mul*/ - + 2UL /* input number of rowwise_add*/ - + 1UL /* input number of sigmod */ - - 1UL /* out2 is not needed*/); - EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); - EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); - EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL); -} - -TEST(Backward, simple_single_op) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - - f::OpDesc *op = block->AppendOp(); - op->SetType("rowwise_add"); - op->SetInput("X", {"x"}); - op->SetInput("b", {"b"}); - op->SetOutput("Out", {"out"}); - - auto target = f::VarDesc("out"); - target.SetShape({1}); - auto var_to_grad = - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 3UL); - f::OpDesc *fill_op = block->AllOps()[1]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op = block->AllOps()[2]; - EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op->InputNames().size(), 1UL); - ASSERT_EQ(grad_op->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out")})); - EXPECT_EQ(grad_op->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x")})); - EXPECT_EQ(grad_op->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b")})); - - EXPECT_EQ(var_to_grad.size(), 3UL); - EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2)); - EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("b"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("x"))); -} - -TEST(Backward, default_attribute) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op = block->AppendOp(); - op->SetType("mul"); - op->SetInput("X", {"x"}); - op->SetInput("Y", {"y"}); - op->SetOutput("Out", {"out"}); - op->CheckAttrs(); - - auto target = f::VarDesc("out"); - target.SetShape({1}); - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 3UL); - EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1); - EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1); - - f::OpDesc *fill_op = block->AllOps()[1]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op = block->AllOps()[2]; - ASSERT_EQ(grad_op->Type(), "mul_grad"); - EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1); - EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1); -} - -TEST(Backward, simple_mult_op) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("rowwise_add"); - op1->SetInput("X", {"x1"}); - op1->SetInput("b", {"b1"}); - op1->SetOutput("Out", {"out1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mul"); - op2->SetInput("X", {"out1"}); - op2->SetInput("Y", {"y2"}); - op2->SetOutput("Out", {"out2"}); - - f::OpDesc *op3 = block->AppendOp(); - op3->SetType("rowwise_add"); - op3->SetInput("X", {"out2"}); - op3->SetInput("b", {"b3"}); - op3->SetOutput("Out", {"out3"}); - - auto target = f::VarDesc("out3"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 6UL + 1); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op1 = block->AllOps()[6]; - EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 1UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b1")})); - - f::OpDesc *grad_op2 = block->AllOps()[5]; - EXPECT_EQ(grad_op2->Type(), "mul_grad"); - ASSERT_EQ(grad_op2->InputNames().size(), 4UL); - ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op2->Input("X"), std::vector({"out1"})); - EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"})); - EXPECT_EQ(grad_op2->Input("Out"), std::vector({"out2"})); - EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out2")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), - std::vector({f::GradVarName("y2")})); - - f::OpDesc *grad_op3 = block->AllOps()[4]; - EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op3->InputNames().size(), 1UL); - ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out3")})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out2")})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b3")})); - - EXPECT_EQ(var_to_grad.size(), 7UL); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("out1"), - f::GradVarInfo(f::GradVarName("out1"), 0, 5)); - EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); - EXPECT_EQ(var_to_grad.at("out2"), - f::GradVarInfo(f::GradVarName("out2"), 0, 4)); - EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out2"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); -} - -TEST(Backward, intermedia_var_no_grad) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("rowwise_add"); - op1->SetInput("X", {"x1"}); - op1->SetInput("b", {"b1"}); - op1->SetOutput("Out", {"out1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mul"); - op2->SetInput("X", {"x2"}); - op2->SetInput("Y", {"y2"}); - op2->SetOutput("Out", {"out2"}); - - f::OpDesc *op3 = block->AppendOp(); - op3->SetType("rowwise_add"); - op3->SetInput("X", {"out2"}); - op3->SetInput("b", {"b3"}); - op3->SetOutput("Out", {"out3"}); - - f::OpDesc *op4 = block->AppendOp(); - op4->SetType("mul"); - op4->SetInput("X", {"out1"}); - op4->SetInput("Y", {"out3"}); - op4->SetOutput("Out", {"out4"}); - - auto target = f::VarDesc("out4"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = AppendBackward(program, target, {"out3"}); - - ASSERT_EQ(block->AllOps().size(), 7UL); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op1 = block->AllOps()[6]; - EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 1UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b1")})); - - f::OpDesc *grad_op4 = block->AllOps()[5]; - EXPECT_EQ(grad_op4->Type(), "mul_grad"); - ASSERT_EQ(grad_op4->InputNames().size(), 4UL); - ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"})); - EXPECT_EQ(grad_op4->Input("Y"), std::vector({"out3"})); - EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out4"})); - EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out4")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector()); - - EXPECT_EQ(var_to_grad.size(), 4UL); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("out1"), - f::GradVarInfo(f::GradVarName("out1"), 0, 5)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); -} - -TEST(Backward, var_no_grad) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("mult_in_out"); - op1->SetInput("X", {"x1"}); - op1->SetInput("H", {"h1"}); - op1->SetOutput("Y", {"y1"}); - op1->SetOutput("Z", {"z1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mult_in_out"); - op2->SetInput("X", {"y1"}); - op2->SetInput("H", {"z1"}); - op2->SetOutput("Y", {"y2"}); - op2->SetOutput("Z", {"z2"}); - - auto target = f::VarDesc("z2"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = AppendBackward(program, target, {"z1"}); - - ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op2 = block->AllOps()[3]; - ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); - ASSERT_EQ(grad_op2->InputNames().size(), 6UL); - ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op2->Input("X"), std::vector({"y1"})); - EXPECT_EQ(grad_op2->Input("H"), std::vector({"z1"})); - EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"})); - EXPECT_EQ(grad_op2->Input("Z"), std::vector({"z2"})); - EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")), - std::vector({f::GradVarName("y2")})); - EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")), - std::vector({f::GradVarName("z2")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), - std::vector({f::GradVarName("y1")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); - - f::OpDesc *fill_zero_op = block->AllOps()[4]; - ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); - ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); - ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); - EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"})); - EXPECT_EQ(fill_zero_op->Output("Out"), - std::vector({std::string("z1") + f::kZeroVarSuffix})); - - f::OpDesc *grad_op1 = block->AllOps()[5]; - ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 6UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input("X"), std::vector({"x1"})); - EXPECT_EQ(grad_op1->Input("H"), std::vector({"h1"})); - EXPECT_EQ(grad_op1->Input("Y"), std::vector({"y1"})); - EXPECT_EQ(grad_op1->Input("Z"), std::vector({"z1"})); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")), - std::vector({f::GradVarName("y1")})); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")), - std::vector({std::string("z1") + f::kZeroVarSuffix})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), - std::vector({f::GradVarName("h1")})); - - EXPECT_EQ(var_to_grad.size(), 4UL); - EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3)); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5)); - EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("y1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("h1"))); -} - -TEST(Backward, shared_var) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - f::OpDesc *op1 = block->AppendOp(); - op1->SetType("rowwise_add"); - op1->SetInput("X", {"x1"}); - op1->SetInput("b", {"b1"}); - op1->SetOutput("Out", {"out1"}); - - f::OpDesc *op2 = block->AppendOp(); - op2->SetType("mul"); - op2->SetInput("X", {"out1"}); - op2->SetInput("Y", {"y2"}); - op2->SetOutput("Out", {"out2"}); - - f::OpDesc *op3 = block->AppendOp(); - op3->SetType("rowwise_add"); - op3->SetInput("X", {"out1"}); - op3->SetInput("b", {"b3"}); - op3->SetOutput("Out", {"out3"}); - - auto target = f::VarDesc("out3"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = - AppendBackward(program, target, std::unordered_set{}); - - ASSERT_EQ(block->AllOps().size(), 8UL); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - - f::OpDesc *grad_op3 = block->AllOps()[4]; - ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op3->InputNames().size(), 1UL); - ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out3")})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1") + "@RENAME@0"})); - EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b3")})); - - f::OpDesc *grad_op4 = block->AllOps()[5]; - ASSERT_EQ(grad_op4->Type(), "mul_grad"); - ASSERT_EQ(grad_op4->InputNames().size(), 4UL); - ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"})); - EXPECT_EQ(grad_op4->Input("Y"), std::vector({"y2"})); - EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out2"})); - EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out2")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), - std::vector({f::GradVarName("out1") + "@RENAME@1"})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), - std::vector({f::GradVarName("y2")})); - - f::OpDesc *sum_op = block->AllOps()[6]; - ASSERT_EQ(sum_op->Type(), "sum"); - ASSERT_EQ(sum_op->InputNames().size(), 1UL); - ASSERT_EQ(sum_op->OutputNames().size(), 1UL); - EXPECT_EQ(sum_op->Input("X"), - std::vector({f::GradVarName("out1") + "@RENAME@0", - f::GradVarName("out1") + "@RENAME@1"})); - EXPECT_EQ(sum_op->Output("Out"), - std::vector({f::GradVarName("out1")})); - - f::OpDesc *grad_op1 = block->AllOps()[7]; - ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); - ASSERT_EQ(grad_op1->InputNames().size(), 1UL); - ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); - EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), - std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), - std::vector({f::GradVarName("x1")})); - EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), - std::vector({f::GradVarName("b1")})); - - EXPECT_EQ(var_to_grad.size(), 6UL); - EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); - EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); - EXPECT_EQ(var_to_grad.at("out1"), - f::GradVarInfo(f::GradVarName("out1"), 0, 6)); - EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7)); - EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7)); - - EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); - EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); -} - -TEST(Backward, half_backward) { - f::ProgramDesc program; - f::BlockDesc *block = program.MutableBlock(0); - auto *op1 = block->AppendOp(); - op1->SetType("minus"); - op1->SetInput("X", {"a"}); - op1->SetInput("Y", {"b"}); - op1->SetOutput("Out", {"out"}); - - auto target = f::VarDesc("out"); - target.SetShape({1}); - size_t forward_len = block->AllOps().size(); - auto var_to_grad = AppendBackward(program, target, {"b"}); - f::OpDesc *fill_op = block->AllOps()[forward_len]; - EXPECT_EQ(fill_op->Type(), "fill_constant"); - auto ops = block->AllOps(); - ASSERT_EQ(3UL, ops.size()); - - EXPECT_EQ(var_to_grad.size(), 2UL); - EXPECT_EQ(var_to_grad.at("a"), - f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1)); -} diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h index 873969b2a884f6d9e133fe87bf72725c36ce8b98..eef19c4f09c60b9df18f154c85c421f5bff9413f 100644 --- a/paddle/fluid/framework/block_desc.h +++ b/paddle/fluid/framework/block_desc.h @@ -92,7 +92,7 @@ class BlockDesc { /* * Remove Op and its input/output variables. - * Note that for either input or ouput variable, if it is also an input or + * Note that for either input or output variable, if it is also an input or * output variable of other ops, we should remain it. */ void RemoveOp(size_t s, size_t e); diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index dc8db33ef4e9cdff8e561a2e0763cd770c87222a..cd9bff52d98e3a35bafb9ebfd0e9fe5b0530bb29 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -48,7 +48,7 @@ void BroadcastOpHandle::RunImpl() { auto out_scope_idx = out_handle->scope_idx_; PADDLE_ENFORCE_LT(out_scope_idx, local_scopes_.size(), - "%s is not the the local_scopes ", out_handle->name_); + "%s is not in the local_scopes ", out_handle->name_); auto *s = local_scopes_[out_scope_idx]; auto out_var = s->FindVar(out_handle->name_); PADDLE_ENFORCE_EQ(out_p.which(), in_place.which(), diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.cc b/paddle/fluid/framework/details/broadcast_op_handle_test.cc index cd069df118fe2b822d6b14ff07338f6e811b060b..9bf72f03602c1533d2500bac9e3ba663a73aa832 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.cc @@ -57,15 +57,12 @@ class BroadcastTester : public ::testing::Test { } } - template void BroadcastInitOp(int input_scope_idx) { for (size_t j = 0; j < gpu_list_.size(); ++j) { local_scope_.push_back(&g_scope_.NewScope()); - auto* out_var = local_scope_[j]->Var("out"); - out_var->GetMutable(); + local_scope_[j]->Var("out"); } - auto* in_var = local_scope_[input_scope_idx]->Var("input"); - in_var->GetMutable(); + local_scope_[input_scope_idx]->Var("input"); bc_op_handle_ = new f::details::BroadcastOpHandle(local_scope_, gpu_list_); @@ -84,7 +81,6 @@ class BroadcastTester : public ::testing::Test { out_var_handle->name_ = "out"; out_var_handle->version_ = 2; out_var_handle->scope_idx_ = j; - out_var_handle->generated_op_ = bc_op_handle_; bc_op_handle_->AddOutput(out_var_handle); } } @@ -109,7 +105,7 @@ class BroadcastTester : public ::testing::Test { void TestBroadcastLodTensor() { int input_scope_idx = 0; - BroadcastInitOp(input_scope_idx); + BroadcastInitOp(input_scope_idx); auto in_var = local_scope_[input_scope_idx]->Var("input"); auto in_lod_tensor = in_var->GetMutable(); @@ -148,7 +144,7 @@ class BroadcastTester : public ::testing::Test { void TestBroadcastSelectedRows() { int input_scope_idx = 0; - BroadcastInitOp(input_scope_idx); + BroadcastInitOp(input_scope_idx); auto in_var = local_scope_[input_scope_idx]->Var("input"); auto in_selected_rows = in_var->GetMutable(); diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc index 7a1b40c0b60a788b1f0a70e688f8fcbe427ad076..e3f8bbb72f2a1b75b6041d41496cef0efc81874f 100644 --- a/paddle/fluid/framework/details/computation_op_handle.cc +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -14,6 +14,8 @@ #include "paddle/fluid/framework/details/computation_op_handle.h" +#include + namespace paddle { namespace framework { namespace details { @@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() { } } - op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get(), place_); + op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get(), place_); } std::string ComputationOpHandle::Name() const { return op_->Type(); } diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 9180903b864d03e59f55f41410b2240fa4199496..e3e7c55d153aec8ce9c25c962821b266eaa84fe4 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -14,6 +14,9 @@ #include "paddle/fluid/framework/details/fetch_op_handle.h" +#include +#include + namespace paddle { namespace framework { namespace details { @@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() { for (size_t i = 0; i < scopes.size(); ++i) { auto &scope = scopes[i]; - auto &t = scope->FindVar(var_name)->Get(); + auto &t = scope->FindVar(kLocalExecScopeName) + ->Get() + ->FindVar(var_name) + ->Get(); if (platform::is_gpu_place(var->place_)) { #ifdef PADDLE_WITH_CUDA TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]); diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc index 3047054d1a02097fbb8081ba59448fb62c9f6916..f9dfb2f5c698751fe85e748b1e5baed342dc5978 100644 --- a/paddle/fluid/framework/details/gather_op_handle.cc +++ b/paddle/fluid/framework/details/gather_op_handle.cc @@ -28,6 +28,12 @@ void GatherOpHandle::RunImpl() { "The number of inputs should be equal to the number of place."); PADDLE_ENFORCE_EQ(this->outputs_.size(), 1, "The number of output should be one."); + auto in_0_handle = static_cast(inputs_[0]); + auto pre_in_var = + local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_); + PADDLE_ENFORCE(pre_in_var->IsType(), + "Currently, gather_op only can gather SelectedRows."); + auto pre_place = in_0_handle->place_; // Wait input done, this Wait is asynchronous operation for (auto *in : inputs_) { @@ -36,10 +42,6 @@ void GatherOpHandle::RunImpl() { in->generated_op_->Wait(dev_ctxes_[p]); } } - auto in_0_handle = static_cast(inputs_[0]); - auto pre_in_var = - local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_); - auto pre_place = in_0_handle->place_; std::vector out_rows; std::vector in_tensors; @@ -110,9 +112,9 @@ void GatherOpHandle::RunImpl() { s = e; } } else if (pre_in_var->IsType()) { - // gather LoDTensor ??? + PADDLE_THROW("Currently, Var only can be SelectedRows."); } else { - PADDLE_THROW("Var should be LoDTensor or SelectedRows."); + PADDLE_THROW("Var should be SelectedRows."); } } diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index 5d105b37aa81d7541030fda310f169d518486210..3cf21553207eed7784fcaf23286e00c3480440be 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -56,15 +56,12 @@ class GatherTester : public ::testing::Test { } } - template void InitGatherOp(int input_scope_idx) { for (size_t j = 0; j < gpu_list_.size(); ++j) { local_scope_.push_back(&g_scope_.NewScope()); - auto* out_var = local_scope_[j]->Var("input"); - out_var->GetMutable(); + local_scope_[j]->Var("input"); } - auto* in_var = local_scope_[input_scope_idx]->Var("out"); - in_var->GetMutable(); + local_scope_[input_scope_idx]->Var("out"); gather_op_handle_ = new f::details::GatherOpHandle(local_scope_, gpu_list_); @@ -106,11 +103,9 @@ class GatherTester : public ::testing::Test { } } - void TestGatherLodTensor() {} - void TestGatherSelectedRows() { int output_scope_idx = 0; - InitGatherOp(output_scope_idx); + InitGatherOp(output_scope_idx); int height = kDims[0] * 2; std::vector rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, @@ -169,21 +164,12 @@ class GatherTester : public ::testing::Test { f::details::GatherOpHandle* gather_op_handle_; }; -// TEST_F(GatherTester, TestCPUGatherTestLodTensor) { -// InitCtxOnGpu(false); -// TestGatherLodTensor(); -//} - TEST_F(GatherTester, TestCPUGatherTestSelectedRows) { InitCtxOnGpu(false); TestGatherSelectedRows(); } #ifdef PADDLE_WITH_CUDA -// TEST_F(GatherTester, TestGPUGatherTestLodTensor) { -// InitCtxOnGpu(true); -// TestGatherLodTensor(); -//} TEST_F(GatherTester, TestGPUGatherTestSelectedRows) { InitCtxOnGpu(true); diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index fedff07772eafa0ac4238194e1ccc763b0782dd3..b733817dcd8c705f8a62c7b11b4df6a00dc553b1 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -32,6 +32,8 @@ namespace details { // temporarily place it in op_handle. Tensor *GetTensorFromVar(Variable *in_var); +constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@"; + class OpHandleBase { private: DISABLE_COPY_AND_ASSIGN(OpHandleBase); diff --git a/paddle/fluid/framework/details/ssa_graph_executor.h b/paddle/fluid/framework/details/ssa_graph_executor.h index 3b818b1a45b56351e34f9e52ec22b6d02a0c1591..a8833b7388ab907020a260d356f1484ffd227658 100644 --- a/paddle/fluid/framework/details/ssa_graph_executor.h +++ b/paddle/fluid/framework/details/ssa_graph_executor.h @@ -15,13 +15,15 @@ #pragma once #include +#include +#include + #include "paddle/fluid/framework/details/ssa_graph.h" #include "paddle/fluid/framework/feed_fetch_type.h" namespace paddle { namespace framework { namespace details { - class SSAGraphExecutor { DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 62af4c1d79ded5eaa30e4e6d43cc0d7327ae9689..1ce69ab02b09fe7ec17f479bcef97c931e853dc4 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ready_ops.clear(); }; - // Create local scopes. - for (auto &scope : local_scopes_) { - auto &local_scope = scope->NewScope(); - *scope->Var("@TMP_SCOPE@")->GetMutable() = &local_scope; - } - // Step 3. Execution while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) { // 1. Run All Ready ops @@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( PADDLE_ENFORCE(ready_ops.empty()); PADDLE_ENFORCE(delayed_ops.empty()); PADDLE_ENFORCE(blocked_by_delayed_ops.empty()); - ++computation_count_; - - auto sync_computation = [&] { - computation_count_ = 0; - // Wait All computational streams - for (auto p : this->places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); - } - for (auto &scope : local_scopes_) { - scope->DropKids(); - } - }; // Wait FetchOps. if (!fetch_ops.empty()) { fetch_ops.clear(); - sync_computation(); - } - - if (computation_count_ == max_async_computation) { - sync_computation(); - } - - // NOTE: the temp scope can be dropped lazily if needed. - // Drop tmp scopes; - for (auto &scope : local_scopes_) { - auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable(); - kid = nullptr; } return fetch_data; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 79cfc26b461a39811a9a125e5aeac3492d967386..bb5e837b135c35b5aea403496b45aab1ccc288ff 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { std::unique_ptr exception_; std::atomic running_ops_; bool allow_op_delay_; - - size_t computation_count_{0}; - size_t max_async_computation{100}; }; } // namespace details diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index a3b4a8c0829ae3324e933309b2eaea35fe571997..f97bd0827428feeb590fcad16c48f3461517a646 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) { } } -static DDim GetDims(const Scope& scope, const std::string& name) { +static DDim GetDims(const Scope& scope, const std::string& name, + bool get_actual_dim = false) { Variable* var = scope.FindVar(name); if (var == nullptr) { return DDim({-1}); @@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) { if (var->IsType()) { return var->Get().dims(); } else if (var->IsType()) { - return var->Get().GetCompleteDims(); + if (get_actual_dim) { + return var->Get().value().dims(); + } else { + return var->Get().GetCompleteDims(); + } } else { return DDim({-1}); } @@ -129,7 +134,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { for (size_t i = 0; i < input.second.size(); ++i) { ss << input.second[i]; if (scope) { - ss << "[" << GetDims(*scope, input.second[i]) << "]"; + ss << "[" << GetDims(*scope, input.second[i], true) << "]"; ss << "(" << GetLoD(*scope, input.second[i]) << ")"; } if (i != input.second.size() - 1) { @@ -149,7 +154,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { for (size_t i = 0; i < output.second.size(); ++i) { ss << output.second[i]; if (scope) { - ss << "[" << GetDims(*scope, output.second[i]) << "]"; + ss << "[" << GetDims(*scope, output.second[i], true) << "]"; ss << "(" << GetLoD(*scope, output.second[i]) << ")"; } if (i != output.second.size() - 1) { diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 20dcc080b696431b9972c0a972904d957f9b47d8..c1486b527d2e06d2b3f7e0f89458bf9a22564586 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" #include +#include #include #ifdef PADDLE_WITH_CUDA @@ -41,6 +42,8 @@ class ParallelExecutorPrivate { #ifdef PADDLE_WITH_CUDA std::unique_ptr nccl_ctxs_; #endif + + std::vector> var_types_; }; std::vector &ParallelExecutor::GetLocalScopes() { @@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor( allow_op_delay)); // Step 3. Create vars in each scope; - for (auto *scope : member_->local_scopes_) { - for (auto *var : main_program.Block(0).AllVars()) { - if (scope->FindVar(var->Name()) != nullptr) { - continue; - } - - InitializeVariable(scope->Var(var->Name()), var->GetType()); - } + for (auto *var : main_program.Block(0).AllVars()) { + member_->var_types_.emplace_back(var->Name(), var->GetType(), + var->Persistable()); } } @@ -163,9 +161,42 @@ void ParallelExecutor::Run( const std::unordered_map &feed_tensors) { platform::RecordBlock b(0); SplitTensorToPlaces(feed_tensors); + + // Create local scopes. + for (auto &scope : member_->local_scopes_) { + Scope &local_scope = scope->NewScope(); + *scope->Var(details::kLocalExecScopeName)->GetMutable() = + &local_scope; + + for (auto &name_type_pair : member_->var_types_) { + if (scope->FindVar(std::get<0>(name_type_pair)) != nullptr) { + continue; + } + + if (std::get<2>(name_type_pair)) { // Persistable + InitializeVariable(scope->Var(std::get<0>(name_type_pair)), + std::get<1>(name_type_pair)); + } else { + InitializeVariable(scope->Var(std::get<0>(name_type_pair)), + std::get<1>(name_type_pair)); + } + } + } + auto fetch_data = member_->executor_->Run(fetch_tensors); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetch_data; + + // Wait All computational streams + for (auto p : member_->places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } + for (auto &scope : member_->local_scopes_) { + auto &local_scope = + *scope->Var(details::kLocalExecScopeName)->GetMutable(); + scope->DeleteScope(local_scope); + local_scope = nullptr; + } } void ParallelExecutor::SplitTensorToPlaces( diff --git a/paddle/fluid/framework/prune_test.cc b/paddle/fluid/framework/prune_test.cc index 0e44b34383027ef58a033eb082f4bb2118b5d8a3..8af7d2d510d36e4c24ce3ae8dbc13c24ad5d4a0f 100644 --- a/paddle/fluid/framework/prune_test.cc +++ b/paddle/fluid/framework/prune_test.cc @@ -14,18 +14,17 @@ limitations under the License. */ #include "paddle/fluid/framework/prune.h" +#include +#include + #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/net_op.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" -#include - namespace f = paddle::framework; -namespace ops = paddle::operators; void AddOp(const std::string &type, const f::VariableNameMap &inputs, const f::VariableNameMap &outputs, f::AttributeMap attrs, diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index f417f62f3f75360f4ae1b7795608ae95200cfeb8..e53bcf2384e54e21c7dd5638f3b7469a35b571bf 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init) +set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) cc_library(paddle_fluid_api SRCS io.cc @@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules}) # Create shared library cc_library(paddle_fluid_shared SHARED SRCS io.cc - DEPS ARCHIVE_START ${GLOB_OP_LIB} ${FLUID_CORE_MODULES} ARCHIVE_END) + DEPS ${fluid_modules}) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) if(NOT APPLE) # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index a5b62ef322bfad0fc956d7d722797bd5add6aea6..a29d457b6fa9d0e8297252c8ff1117013d2055f8 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -17,10 +17,16 @@ limitations under the License. */ #include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/feed_fetch_type.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/pybind/pybind.h" namespace paddle { namespace inference { +// Temporarilly add this function for exposing framework::InitDevices() when +// linking the inference shared library. +void Init(bool init_p2p) { framework::InitDevices(init_p2p); } + void ReadBinaryFile(const std::string& filename, std::string& contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); diff --git a/paddle/fluid/inference/io.h b/paddle/fluid/inference/io.h index d07d315b93ef10a464080899b1cb9920abe83be3..756c936b33ad55e2994542b171b945e248ba2e21 100644 --- a/paddle/fluid/inference/io.h +++ b/paddle/fluid/inference/io.h @@ -18,12 +18,15 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" namespace paddle { namespace inference { +void Init(bool init_p2p); + void LoadPersistables(framework::Executor& executor, framework::Scope& scope, const framework::ProgramDesc& main_program, const std::string& dirname, diff --git a/paddle/fluid/inference/tests/book/CMakeLists.txt b/paddle/fluid/inference/tests/book/CMakeLists.txt index 86e36f3f65cb79b23e5bb23d7d8b2b34a000193c..97d9f03f88ad3e851a2dd4256d34e8ca76fdfb01 100644 --- a/paddle/fluid/inference/tests/book/CMakeLists.txt +++ b/paddle/fluid/inference/tests/book/CMakeLists.txt @@ -17,7 +17,7 @@ function(inference_test TARGET_NAME) string(REGEX REPLACE "^_$" "" arg "${arg}") cc_test(test_inference_${TARGET_NAME}${arg} SRCS test_inference_${TARGET_NAME}.cc - DEPS ARCHIVE_START paddle_fluid ARCHIVE_END + DEPS paddle_fluid ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model) set_tests_properties(test_inference_${TARGET_NAME}${arg} PROPERTIES DEPENDS test_${TARGET_NAME}) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 5ff987ad8b3ba3c9195e87e6c11e70ac98fa0a11..3c8696b508443e1b8d9f7cac6336b70562ffedc5 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -100,7 +100,7 @@ function(op_library TARGET) endif() # Define operators that don't need pybind here. - foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") + foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") if ("${TARGET}" STREQUAL "${manual_pybind_op}") set(pybind_flag 1) endif() @@ -199,7 +199,6 @@ else() set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op) endif() -op_library(cond_op DEPS framework_proto tensor net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_op DEPS softmax) @@ -259,7 +258,6 @@ endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") cc_test(gather_test SRCS gather_test.cc DEPS tensor) -cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op) diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index d65a7b34678cda38d5f8beb9154d61928f517ce0..4a36b03cb63ac3ea61be1bbc56b8dd0adbe7d334 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/concat_op.h" + #include #include @@ -34,7 +35,10 @@ class ConcatOp : public framework::OperatorWithKernel { size_t axis = static_cast(ctx->Attrs().Get("axis")); const size_t n = ins.size(); - PADDLE_ENFORCE_GT(n, 1, "Input tensors count should > 1."); + PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0."); + if (n == 1) { + VLOG(3) << "Warning: concat op have only one input, may waste memory"; + } auto out_dims = ins[0]; size_t in_zero_dims_size = out_dims.size(); diff --git a/paddle/fluid/operators/cond_op.cc b/paddle/fluid/operators/cond_op.cc deleted file mode 100644 index 15dce9e3e28fa0200e332534f42752838da4db92..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/cond_op.cc +++ /dev/null @@ -1,235 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/cond_op.h" -#include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/scatter.h" -#include "paddle/fluid/platform/device_context.h" - -namespace paddle { -namespace operators { - -using Scope = framework::Scope; -using Variable = framework::Variable; -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; -using DDim = framework::DDim; - -framework::Scope& CondOp::AddSubScope(const Scope& scope) const { - auto sub_scopes_var = scope.FindVar("SubScopes"); - PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, - "Output(SubScopes) of CondOp should not be null."); - auto sub_scopes = sub_scopes_var->GetMutable>(); - auto& sub_scope = scope.NewScope(); - sub_scopes->push_back(&sub_scope); - return sub_scope; -} - -std::vector& CondOp::GetSubScopes( - const framework::Scope& scope) const { - auto sub_scopes_var = scope.FindVar("SubScopes"); - PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, - "Output(SubScopes) of CondOp should not be null."); - return *sub_scopes_var->GetMutable>(); -} - -LoDTensor& CondOp::AddIndexTensor(const Scope& scope) const { - auto index_tensors_var = scope.FindVar("IndexTensors"); - PADDLE_ENFORCE_NOT_NULL(index_tensors_var, - "Output(IndexTensors) of CondOp should not be null."); - auto& index_tensors = - *index_tensors_var->GetMutable>(); - index_tensors.push_back(LoDTensor()); - return index_tensors.back(); -} - -std::vector& CondOp::GetIndexTensors( - const framework::Scope& scope) const { - auto* index_tensors_var = scope.FindVar("IndexTensors"); - PADDLE_ENFORCE_NOT_NULL(index_tensors_var, - "Output(IndexTensors) of CondOp should not be null."); - return *index_tensors_var->GetMutable>(); -} - -void CondOp::PrepareDataForSubnet( - const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const { - PADDLE_ENFORCE(!Inputs("Xs").empty(), "Inputs(Xs) of CondOp can't be empty."); - - for (int i = 0; i < BRANCH_NUM; ++i) { - // Create two sub scopes for true and false branches - // sub_scopes[0] for the true branch - // sub_scopes[1] for the false branch - AddSubScope(scope); - // Create two tensors for true and false indices: - // index_tensors[0] for the true branch - // index_tensors[1] for the false branch - AddIndexTensor(scope); - } - - Variable* cond_var = scope.FindVar(Input("Cond")); - PADDLE_ENFORCE_NOT_NULL(cond_var, - "Input(Cond) of CondOp should not be null."); - const LoDTensor* cond = cond_var->GetMutable(); - - // get the true/false index at runtime according to cond tensor - // index_vectors[0]: vector, contains all index for cond[i] == true - // index_vectors[1]: vector, contains all index for cond[i] == false - std::vector> index_vectors; - index_vectors.resize(BRANCH_NUM); - - const int* cond_data = cond->data(); - for (int i = 0; i < cond->dims()[0]; ++i) { - if (cond_data[i]) - index_vectors[TRUE_BRANCH].push_back(i); - else - index_vectors[FALSE_BRANCH].push_back(i); - } - - // put index_vectors[0] and index_vectors[1] into two tensors: - // index_tensors[0] and index_tensors[1] - std::vector& index_tensors = GetIndexTensors(scope); - std::vector& sub_scopes = GetSubScopes(scope); - - for (int i = 0; i < BRANCH_NUM; ++i) { - DDim dim = {static_cast(index_vectors[i].size())}; - int* index_tensor_data_ptr = - index_tensors[i].mutable_data(dim, platform::CPUPlace()); - memcpy(index_tensor_data_ptr, index_vectors[i].data(), - dim[0] * sizeof(int)); - } - - // create input in subscopes according to index_vectors - for (auto& input : Inputs("Xs")) { - Variable* var_parent = scope.FindVar(input); - PADDLE_ENFORCE_NOT_NULL(var_parent); - const auto* tensor_parent = &var_parent->Get(); - - for (int i = 0; i < BRANCH_NUM; ++i) { - Variable* var_child = sub_scopes[i]->FindVar(input); - PADDLE_ENFORCE_NOT_NULL(var_child); - auto* tensor_child = var_child->GetMutable(); - - // Resize child - DDim dim = tensor_parent->dims(); - dim[0] = index_tensors[i].dims()[0]; - tensor_child->mutable_data(dim, platform::CPUPlace()); - - CPUGather(dev_ctx, *tensor_parent, index_tensors[i], tensor_child); - } - } - - // create output_tensors in subscope for sub_net - for (int i = 0; i < BRANCH_NUM; ++i) { - for (auto& output : (*sub_net_op_[i]).Outputs()) { - for (auto& var_name : output.second) { - sub_scopes[i]->Var(var_name); - } - } - } -} - -void CondOp::MergeDataFromSubnet(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const { - std::vector& sub_scopes = GetSubScopes(scope); - const std::vector& index_tensors = - GetIndexTensors(scope); - - // Infer the output dim, out_dim[0] = true_dim[0] + false_dim[0] - PADDLE_ENFORCE(!Outputs("Outs").empty(), - "Outputs(Outs) of CondOp can't be empty."); - for (auto& output : Outputs("Outs")) { - const LoDTensor* tensor_t_out = - &sub_scopes[TRUE_BRANCH]->FindVar(output)->Get(); - PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL"); - const LoDTensor* tensor_f_out = - &sub_scopes[FALSE_BRANCH]->FindVar(output)->Get(); - PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL"); - - auto* var_out = scope.FindVar(output); - PADDLE_ENFORCE_NOT_NULL(var_out, "Output not found"); - LoDTensor* tensor_out = var_out->GetMutable(); - PADDLE_ENFORCE_NOT_NULL(tensor_t_out, - "True output tensor should not be NULL"); - - DDim true_dim = tensor_t_out->dims(); - DDim false_dim = tensor_f_out->dims(); - true_dim[0] = 0; - false_dim[0] = 0; - PADDLE_ENFORCE_EQ(true_dim, false_dim, - "Outputs not of the same shape except the first dim"); - - DDim out_dim = tensor_t_out->dims(); - out_dim[0] = tensor_t_out->dims()[0] + tensor_f_out->dims()[0]; - tensor_out->Resize(out_dim); - tensor_out->mutable_data(platform::CPUPlace()); - } - - // merge output results: - // output_tensor = true_output_tensor + false_output_tensor - for (auto& output : Outputs("Outs")) { - Variable* var_parent = scope.FindVar(output); - PADDLE_ENFORCE_NOT_NULL(var_parent); - auto* tensor_parent = var_parent->GetMutable(); - - for (int i = 0; i < BRANCH_NUM; ++i) { - Variable* var_child = sub_scopes[i]->FindVar(output); - PADDLE_ENFORCE_NOT_NULL(var_child); - auto* tensor_child = &var_child->Get(); - ScatterAssign(dev_ctx, *tensor_child, index_tensors[i], - tensor_parent); - } - } -} - -void CondOp::RunImpl(const Scope& scope, const platform::Place& place) const { - // get device context from pool - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto& dev_ctx = *pool.Get(place); - - PrepareDataForSubnet(scope, dev_ctx); - std::vector& sub_scopes = GetSubScopes(scope); - for (int i = 0; i < BRANCH_NUM; ++i) { - sub_net_op_[i]->Run(*sub_scopes[i], place); - } - MergeDataFromSubnet(scope, dev_ctx); -} - -class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { - public: - CondOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("Cond", "The condition, which is a bool vector"); - AddInput("Xs", "Inputs of Subnets").AsDuplicable(); - AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable(); - - AddOutput("SubScopes", "sub scopes for true and false branches"); - AddOutput("IndexTensors", "Index Tensors contains indices for true/false"); - - AddComment(R"DOC( -Sample Dependent Conditional Operator. - -Given Cond[i] as a 1/0 vector to indicate true/false: -Out[i] = subnet_true[i], if Cond[i] == true -Out[i] = subnet_false[i], if Cond[i] == false - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp, - paddle::operators::CondOpProtoAndCheckerMaker); diff --git a/paddle/fluid/operators/cond_op.h b/paddle/fluid/operators/cond_op.h deleted file mode 100644 index d3888923dbdeee122fb3045a839c0ba639b892b1..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/cond_op.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "glog/logging.h" -#include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/net_op.h" - -namespace paddle { -namespace operators { - -/* - * @brief CondOp is a dynamic if-else Operator - * - * It has a input tensor named cond indicating which netop each instance will - * run. - * - * if cond == 1, it will run true_net, which is a NetOp. - * - * if cond == 0, it will run false_net, which is another NetOp. - */ -class CondOp : public framework::OperatorBase { - public: - CondOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) { - sub_net_op_.resize(BRANCH_NUM); - } - - CondOp(const CondOp& o) - : framework::OperatorBase( - static_cast(o)) { - // TODO(yuyang18): Implement copy ctor well. - PADDLE_THROW("Not implemented"); - } - - framework::Scope& AddSubScope(const framework::Scope& scope) const; - std::vector& GetSubScopes( - const framework::Scope& scope) const; - - framework::LoDTensor& AddIndexTensor(const framework::Scope& scope) const; - std::vector& GetIndexTensors( - const framework::Scope& scope) const; - - void PrepareDataForSubnet(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const; - void MergeDataFromSubnet(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const; - - /* - * Set True Block - */ - void set_truenet(std::unique_ptr&& net) { - sub_net_op_[TRUE_BRANCH] = std::move(net); - } - - /* - * Set False Block - */ - void set_falsenet(std::unique_ptr&& net) { - sub_net_op_[FALSE_BRANCH] = std::move(net); - } - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& place) const override; - - private: - const int TRUE_BRANCH = 0; - const int FALSE_BRANCH = 1; - const int BRANCH_NUM = 2; - - // sub_net_op_[0]: subnet_t - // sub_net_op_[1]: subnet_f - std::vector> sub_net_op_; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index 54e0b1d9ad83c5f01f3f0dfbc2a95c642c0aaadc..bbad74e96d9c6c1be24639b63e472f18a599cfab 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include "paddle/fluid/operators/ctc_align_op.h" namespace paddle { diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index 70698d99589ae9e2e18ec8b1c1bb3bc8c7476131..9c5c6f5aa03632fe3079074d4b164f871fad634d 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/detail/grpc_server.cc index d5fc163bc25409e0607b149b61c6266b38119d9d..0b582a08bc0bfbcfdc8f338a6add8edaa5e80818 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/detail/grpc_server.cc @@ -161,6 +161,7 @@ class RequestPrefetch final : public RequestBase { ::grpc::ByteBuffer reply; std::string var_name = request_->OutVarname(); + VLOG(3) << "prefetch var " << var_name; auto var_desc = program_->Block(0).FindVar(var_name); framework::Scope* local_scope = &scope_->NewScope(); auto* var = local_scope->FindVar(var_name); diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index f04d8d8fd82ed2336dff9c5b88808dc32de6630a..a33634ab2503f988a8a692682ddb238d4794a3c0 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -106,18 +107,18 @@ information. However, the output only shares the LoD information with input $X$. protected: std::string comment_; - void Replace(std::string& src, std::string from, std::string to) { + void Replace(std::string* src, std::string from, std::string to) { std::size_t len_from = std::strlen(from.c_str()); std::size_t len_to = std::strlen(to.c_str()); - for (std::size_t pos = src.find(from); pos != std::string::npos; - pos = src.find(from, pos + len_to)) { - src.replace(pos, len_from, to); + for (std::size_t pos = src->find(from); pos != std::string::npos; + pos = src->find(from, pos + len_to)) { + src->replace(pos, len_from, to); } } void SetComment(std::string name, std::string equation) { - Replace(comment_, "{name}", name); - Replace(comment_, "{equation}", equation); + Replace(&comment_, "{name}", name); + Replace(&comment_, "{equation}", equation); } }; diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index 2a91dcbcd418fcd61445b7d744789bdeee11d2f2..2490b83b8c50ce4a68095be10d78a380174c1a3f 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/gru_op.h" +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 0886bebc41d8b0f28745e88685f3954f86c823a1..1d5c291495c0f0c0d8da9ff6949888b4cbb6036d 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -13,15 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/gru_compute.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence2batch.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index 048391549dd8df24cc215d04431c306ac4c7e5be..5b387d8d344dfc3475a537827acd9e125fe6693c 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/im2sequence_op.h" +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index a6a83fefbc6266fa718dcad78b3a018526f124db..d792c68f784d8ffec0eb303a6ab9b59c9f121fa7 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #pragma once - +#include #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc index eef25f8a06ddb3311f3cfea21b64d8f7d7e58f24..c2a8c7f867a4483a7fda2f4336a64ab109ce86e8 100644 --- a/paddle/fluid/operators/label_smooth_op.cc +++ b/paddle/fluid/operators/label_smooth_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/label_smooth_op.h" +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index 800a1303e1a427e7bd5e6c04354b8a5fbd816712..d5162bcd742c05980c89394b5d011bd078b61211 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -100,7 +100,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { auto x_row_max = EigenMatrix::From(emission_row_max); x_row_max.device(place) = x.maximum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(int(batch_size), 1)); + .reshape(Eigen::DSizes(static_cast(batch_size), 1)); auto x_exps = EigenMatrix::From(*emission_exps); x_exps.device(place) = diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 9188f2d989e601b7a97dedaf71f7080829cdb7c3..5d293665f0bcc098126ad3ec6c9bf34ff54c3b6f 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include // NOLINT +#include #include "paddle/fluid/operators/listen_and_serv_op.h" @@ -88,8 +89,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, auto ins = Inputs("X"); auto fan_in = Attr("Fanin"); - auto *block = Attr(kOptimizeBlock); - auto *program = block->Program(); + auto *optimize_block = Attr(kOptimizeBlock); + auto *prefetch_block = Attr(kPrefetchBlock); + auto *program = optimize_block->Program(); size_t num_blocks = program->Size(); PADDLE_ENFORCE_GE(num_blocks, 2, "server program should have at least 2 blocks"); @@ -97,18 +99,25 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, framework::Executor executor(dev_place); std::vector block_list; for (size_t blkid = 1; blkid < num_blocks; ++blkid) { - block_list.push_back(blkid); + if (blkid != prefetch_block->ID()) { + block_list.push_back(blkid); + } } - auto prepared = executor.Prepare(*program, block_list); + auto optimize_prepared = executor.Prepare(*program, block_list); // Insert placeholder for block0 which holds current op itself. - prepared.insert(prepared.begin(), - std::shared_ptr(nullptr)); + optimize_prepared.insert( + optimize_prepared.begin(), + std::shared_ptr(nullptr)); rpc_service_->SetScope(&recv_scope); rpc_service_->SetDevCtx(&dev_ctx); // TODO(qiao) set proper fields for table lookup and update rpc_service_->SetExecutor(&executor); - rpc_service_->SetPrefetchBlkdId(0); + VLOG(3) << "prefetch block id is " << prefetch_block->ID(); + auto prefetch_prepared = executor.Prepare(*program, prefetch_block->ID()); + rpc_service_->SetPrefetchBlkdId(prefetch_block->ID()); + rpc_service_->SetPrefetchPreparedCtx(prefetch_prepared.get()); + prefetch_prepared.release(); rpc_service_->SetProgram(program); // start the server listening after all member initialized. server_thread_.reset(new std::thread(RunServer, rpc_service_)); @@ -166,16 +175,18 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, parallel_blkids.push_back(1); double ts = detail::GetTimestamp(); for (size_t blkid = 2; blkid < num_blocks; ++blkid) { - if (program->Block(blkid).Parent() != last_parent_blkid) { - ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program, - &recv_scope); - parallel_blkids.clear(); - last_parent_blkid = program->Block(blkid).Parent(); + if (blkid != prefetch_block->ID()) { + if (program->Block(blkid).Parent() != last_parent_blkid) { + ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared, + program, &recv_scope); + parallel_blkids.clear(); + last_parent_blkid = program->Block(blkid).Parent(); + } + parallel_blkids.push_back(blkid); } - parallel_blkids.push_back(blkid); } - ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program, - &recv_scope); + ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared, + program, &recv_scope); VLOG(2) << "run all blocks spent " << detail::GetTimestamp() - ts << "(ms)"; // Reset the received sparse variables, the sum operator would not @@ -211,6 +222,8 @@ from send_op and send back variables to recv_op. .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); AddAttr(kOptimizeBlock, "BlockID to run on server side."); + AddAttr(kPrefetchBlock, + "prefetch block to run on server side."); AddAttr("Fanin", "How many clients send to this server.") .SetDefault(1); } diff --git a/paddle/fluid/operators/listen_and_serv_op.h b/paddle/fluid/operators/listen_and_serv_op.h index 0da87afc961e896f04b4f0028bf9b17d5e992548..759b2a462ba5b938991aa86be9b9dc3e59fe3f7e 100644 --- a/paddle/fluid/operators/listen_and_serv_op.h +++ b/paddle/fluid/operators/listen_and_serv_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -27,6 +28,7 @@ namespace paddle { namespace operators { constexpr char kOptimizeBlock[] = "OptimizeBlock"; +constexpr char kPrefetchBlock[] = "PrefetchBlock"; void RunServer(std::shared_ptr service); diff --git a/paddle/fluid/operators/logical_op.cc b/paddle/fluid/operators/logical_op.cc index 6a7db31cf36f31064259abeb0348e682be9f917c..41aa00ee8ac10e0776c066fc3c37f97b0dd40cc3 100644 --- a/paddle/fluid/operators/logical_op.cc +++ b/paddle/fluid/operators/logical_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/logical_op.h" +#include #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index bf33be310686640fa187a07cf46a157b7f433340..5e59bd1b178ad1803f6f70c5f3f9fd7af495ac3c 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -78,6 +78,9 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { "(boolean, default false) " "Sparse update.") .SetDefault(false); + AddAttr("is_distributed", + "(boolean, default false) distributed lookup table.") + .SetDefault(false); AddAttr("padding_idx", "(int64, default -1) " "If the value is -1, it makes no effect to lookup. " diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index cb1568398125bbb57da974096da527200c1e0975..553a06c3dcdbb9de43afcace75ebec7c5e819d4a 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lrn_op.h" +#include #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc index d75537741ef1d13b61ad6e244b2bba1ae5509da5..e062d62c66c25e386c7643e310034bc1481ec43d 100644 --- a/paddle/fluid/operators/lstm_op.cc +++ b/paddle/fluid/operators/lstm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lstm_op.h" +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 11f9f223b5d9a8091c51c93cee3f9c23b62e5573..a1ef0eb278dea7205cd8052bbe006b0ae4e3a466 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/lstm_compute.h" diff --git a/paddle/fluid/operators/lstm_unit_op.cu b/paddle/fluid/operators/lstm_unit_op.cu index 76245a1b5a9c8ba9c7ee7d7c03a95e2595a01591..acf094238fff92711edf00b4180266138362add1 100644 --- a/paddle/fluid/operators/lstm_unit_op.cu +++ b/paddle/fluid/operators/lstm_unit_op.cu @@ -18,6 +18,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/cross_entropy_op.h" +#include "paddle/fluid/operators/lstm_unit_op.h" #include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/hostdevice.h" diff --git a/paddle/fluid/operators/lstmp_op.cc b/paddle/fluid/operators/lstmp_op.cc index a881ef82ec3cefa826f5f0856cc4fc13c7d7afc0..82541517e122d5da2674b55561ba72af970a2567 100644 --- a/paddle/fluid/operators/lstmp_op.cc +++ b/paddle/fluid/operators/lstmp_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lstmp_op.h" +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index dfa7f74d5116b4e3f1508f8bef94c598711e8124..172db548960135fbc1841cf58b73894d4f74d838 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/lstm_compute.h" diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 85855928521b8b4cc5e8746b0b5f841cc2587618..1f5255887391218b766aa23842e443c8b2ad080f 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/matmul_op.h" +#include +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/matmul_op.h b/paddle/fluid/operators/matmul_op.h index 1cd8fe55dcbd23eae771550a363bf0a07e9bf585..f2e9cfdcdbf93326ae193776a7d5f6a324373603 100644 --- a/paddle/fluid/operators/matmul_op.h +++ b/paddle/fluid/operators/matmul_op.h @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - +#include +#include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matmul.h" diff --git a/paddle/fluid/operators/maxout_op.cc b/paddle/fluid/operators/maxout_op.cc index efaae7d5f2d20484d90f79d9e13ec2f5ed6e06c9..4e28d98834d27351be99106d6760eae46baf8938 100644 --- a/paddle/fluid/operators/maxout_op.cc +++ b/paddle/fluid/operators/maxout_op.cc @@ -13,6 +13,8 @@ * limitations under the License. */ #include "paddle/fluid/operators/maxout_op.h" +#include + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc index 7de9d94979fdc3f3352c556cc8b655ad4bc7e201..a302b24560e680076d62d02b422c6410467deb1d 100644 --- a/paddle/fluid/operators/minus_op.cc +++ b/paddle/fluid/operators/minus_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/minus_op.h" -#include "paddle/fluid/operators/net_op.h" + +#include +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/momentum_op.cu b/paddle/fluid/operators/momentum_op.cu index da4a6af298f61a20e60ff1b8358f30bb0aca2280..5eb9d9950248bb50bb823f071c7fff0ddcc47234 100644 --- a/paddle/fluid/operators/momentum_op.cu +++ b/paddle/fluid/operators/momentum_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/momentum_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index 90af1e2d602ac039b4d98a69a889ff8b1b85ffc6..5038287527c70d376d8c8a1cc8e4cca0b563126a 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/mul_op.h" +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/net_op.cc b/paddle/fluid/operators/net_op.cc deleted file mode 100644 index 0c2da744177b602246719d701257fc1b509ad81e..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/net_op.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/net_op.h" -#include -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -const char NetOp::kAll[] = "all"; - -void NetOp::CompleteAddOp(bool calc) { - add_op_done_ = true; - if (!calc) return; - std::set input_set; - std::set output_set; - for (auto& op : ops_) { - for (auto& ipt : op->Inputs()) { - for (auto& var_name : ipt.second) { - // If input variable has been in output set, then it will be - // added into intermediate_outputs_. Otherwise, it will be - // added into input set. - if (Contains(output_set, var_name)) { - intermediate_outputs_.insert(var_name); - } else { - input_set.insert(var_name); - } - } - } - - for (auto& opt : op->Outputs()) { - for (auto& var_name : opt.second) { - output_set.insert(var_name); - } - } - } - auto& inputs = inputs_[kAll]; - inputs.reserve(input_set.size()); - std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs)); - auto& outputs = outputs_[kAll]; - outputs.reserve(output_set.size()); - std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs)); -} - -std::string NetOp::DebugStringEx(const framework::Scope* scope) const { - std::ostringstream os; - os << OperatorBase::DebugStringEx(scope) << std::endl; - for (auto& op : ops_) { - std::istringstream is(op->DebugStringEx(scope)); - for (std::string line; std::getline(is, line);) { - os << " " << line << std::endl; - } - } - return os.str(); -} - -bool NetOp::IsNetOp() const { return true; } - -std::vector NetOp::OutputVars(bool has_intermediate) const { - std::vector all; - for (auto& pair : this->outputs_) { - for (auto& var_name : pair.second) { - all.push_back(var_name); - } - } - if (has_intermediate) { - return all; - } - std::vector ret_val; - for (auto& each : all) { - if (!Contains(intermediate_outputs_, each)) { - ret_val.push_back(each); - } - } - return ret_val; -} - -NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - -std::unique_ptr NetOp::Clone() const { - PADDLE_ENFORCE( - add_op_done_, - "Must clone a sealed NetOp, invoke Net::CompleteAddOp before clone"); - return std::unique_ptr(new NetOp(*this)); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/net_op.h b/paddle/fluid/operators/net_op.h deleted file mode 100644 index cbf8820cf4991bc24893f13646364dea0955a128..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/net_op.h +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -/** - * @brief Network is also a type of Operator - * - * It will manage the operators it has. - * - * Network is the container and controller of a set of operators. - - * A network object knows all Operators belonging to this network. Variables, - * which are inputs and outputs of these operators, are created and managed by a - * hierarchy of Scope objects. - * - * This is the base class of network, all the networks should implement the APIs - * it defines. - */ -class NetOp : public framework::OperatorBase { - public: - static const char kAll[]; - NetOp() - : framework::OperatorBase("plain_net", framework::VariableNameMap{}, - framework::VariableNameMap{}, - framework::AttributeMap{}) {} - - NetOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs); - - NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) { - this->ops_.reserve(o.ops_.size()); - std::transform( - o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_), - [](const std::unique_ptr& op) { - return std::unique_ptr(op->Clone()); - }); - this->CompleteAddOp(); - } - - bool SupportGPU() const override { - for (auto& op : ops_) { - if (!op->SupportGPU()) { - return false; - } - } - return true; - } - - void AppendOp(const framework::OperatorBase& op) { AppendOp(op.Clone()); } - - /** - * @brief Add an operator by ptr - */ - void AppendOp(std::unique_ptr op) { - PADDLE_ENFORCE(!add_op_done_, - "Cannot AppendOp when this network is sealed"); - PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); - ops_.push_back(std::move(op)); - } - - void InsertOp(size_t pos, std::unique_ptr op) { - PADDLE_ENFORCE(!add_op_done_, - "Cannot InsertOp when this network is sealed"); - PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); - PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); - ops_.insert(ops_.begin() + pos, std::move(op)); - } - - void InsertOp(size_t pos, const framework::OperatorBase& op) { - InsertOp(pos, op.Clone()); - } - - void CompleteAddOp(bool calculate = true); - - std::string DebugStringEx( - const framework::Scope* scope = nullptr) const override; - - bool IsNetOp() const override; - std::vector OutputVars(bool has_intermediate) const override; - - std::unique_ptr Clone() const override; - - std::vector> ops_; - - private: - /** - * @brief Run the network. - * - * Run all the operators with the `scope`, if no scope is provided, default - * scope will be used instead. If no OpContext is provicded, default context - * will be used. - */ - void RunImpl(const framework::Scope& scope, - const platform::Place& place) const override { - for (auto& op : ops_) { - op->Run(scope, place); - } - } - - bool add_op_done_{false}; - std::set intermediate_outputs_; - - template - static bool Contains(T container, KeyType key) { - return container.find(key) != container.end(); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/net_op_test.cc b/paddle/fluid/operators/net_op_test.cc deleted file mode 100644 index 3b5f57548585398c441fd8038ba8b053c27392cf..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/net_op_test.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "paddle/fluid/operators/net_op.h" - -#include - -namespace paddle { -namespace operators { -using Scope = framework::Scope; -using DeviceContext = platform::DeviceContext; - -static int run_cnt = 0; - -class TestOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - DEFINE_OP_CLONE_METHOD(TestOp); - - private: - void RunImpl(const Scope& scope, - const platform::Place& place) const override { - ++run_cnt; - } -}; - -template -void AssertSameVectorWithoutOrder(const std::vector& expected, - const std::vector& actual) { - ASSERT_EQ(expected.size(), actual.size()); - std::unordered_set expected_set; - for (auto& tmp : expected) { - expected_set.insert(tmp); - } - for (auto& act : actual) { - ASSERT_NE(expected_set.end(), expected_set.find(act)); - } -} - -TEST(OpKernel, all) { - auto net = std::make_shared(); - ASSERT_NE(net, nullptr); - - net->AppendOp(std::unique_ptr( - new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"Out", {"y"}}}, framework::AttributeMap{}))); - net->AppendOp(std::unique_ptr( - new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, - {{"Out", {"z"}}}, framework::AttributeMap{}))); - - net->CompleteAddOp(); - AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, - net->Inputs(NetOp::kAll)); - AssertSameVectorWithoutOrder({"y", "z"}, net->Outputs(NetOp::kAll)); - - auto final_outs = net->OutputVars(false); - - ASSERT_EQ(final_outs.size(), 1UL); - ASSERT_EQ(final_outs[0], "z"); -} - -TEST(NetOp, insert_op) { - NetOp net; - auto op1 = std::unique_ptr( - new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"Out", {"y"}}}, framework::AttributeMap{})); - net.AppendOp(*op1); - net.InsertOp(0, *op1); - ASSERT_EQ(2UL, net.ops_.size()); - net.InsertOp(2, std::move(op1)); - ASSERT_EQ(3UL, net.ops_.size()); -} - -TEST(NetOp, Clone) { - NetOp net; - net.AppendOp(std::unique_ptr(new framework::NOP{ - "empty", framework::VariableNameMap{}, framework::VariableNameMap{}, - framework::AttributeMap{}})); - net.AppendOp(std::unique_ptr(new framework::NOP{ - "empty2", framework::VariableNameMap{}, framework::VariableNameMap{}, - framework::AttributeMap{}})); - net.CompleteAddOp(true); - auto new_net_op = net.Clone(); - ASSERT_NE(new_net_op, nullptr); - ASSERT_TRUE(new_net_op->IsNetOp()); - auto* new_net = static_cast(new_net_op.get()); - ASSERT_EQ(2UL, new_net->ops_.size()); - ASSERT_EQ(new_net->ops_[0]->Type(), "empty"); - ASSERT_EQ(new_net->ops_[1]->Type(), "empty2"); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/prefetch_op.cc b/paddle/fluid/operators/prefetch_op.cc index 09ab7da663b5ef5f099b9f65b0df661ceea0d9e2..f9ae01ab5d2972d2a74b36ae6035985d1d874bb6 100644 --- a/paddle/fluid/operators/prefetch_op.cc +++ b/paddle/fluid/operators/prefetch_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include // NOLINT #include #include "paddle/fluid/framework/data_type.h" @@ -50,8 +50,8 @@ class PrefetchOp : public framework::OperatorBase { for (size_t i = 0; i < ins.size(); i++) { if (NeedSend(scope, ins[i])) { - VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << "to get " - << outs[i] << "back"; + VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get " + << outs[i] << " back"; rpc_client->AsyncPrefetchVariable(epmap[i], ctx, scope, ins[i], outs[i]); } else { @@ -71,7 +71,7 @@ class PrefetchOpMaker : public framework::OpProtoAndCheckerMaker { "(RPCClient) The RPC client object which will be" "initialized at most once."); AddOutput("Out", - "(SelectedRows) result " + "(LoDTensor) result " "to be fetched from parameter server") .AsDuplicable(); AddAttr>( diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index 447b854544b72043ea09c09c134af3a48c305561..7fb45bd19da3a7f0c51d8e98a52efe62c15c1c55 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/prelu_op.h" -#include "paddle/fluid/operators/net_op.h" + +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index b16d06df8d0f7f57a5ec2f2be9a2cbb12a8ba55d..7ca7639fdb9b4c0fe5fe059a1cad1a22987d47e4 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" -#include "paddle/fluid/operators/net_op.h" + +#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/send_recv_op_test.cc b/paddle/fluid/operators/send_recv_op_test.cc index 542bc3fde2a3616807eea560be85fb42026d5825..3bf5d57809019d3ae469471c2ee2e7aac70b9faf 100644 --- a/paddle/fluid/operators/send_recv_op_test.cc +++ b/paddle/fluid/operators/send_recv_op_test.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include #include -#include +#include // NOLINT #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" @@ -37,11 +37,11 @@ namespace m = paddle::operators::math; std::unique_ptr listen_and_serv_op; int selected_port; -void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) { +void InitTensorsInScope(const p::CPUPlace &place, f::Scope *scope) { p::CPUDeviceContext ctx(place); for (int i = 0; i < 2; ++i) { auto var_name = paddle::string::Sprintf("x%d", i); - auto var = scope.Var(var_name); + auto var = scope->Var(var_name); auto tensor = var->GetMutable(); tensor->Resize({10, 10}); float *expect = tensor->mutable_data(place); @@ -50,20 +50,20 @@ void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) { } } - auto out_var = scope.Var("Out"); + auto out_var = scope->Var("Out"); auto out_tensor = out_var->GetMutable(); out_tensor->Resize({10, 10}); out_tensor->mutable_data(place); // allocate } -void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { +void InitSelectedRowsInScope(const p::CPUPlace &place, f::Scope *scope) { p::CPUDeviceContext ctx(place); int64_t height = 10; int64_t row_numel = 10; m::SetConstant set_one; // init x0 std::vector rows0{0, 4, 7}; - auto x0_var = scope.Var("x0"); + auto x0_var = scope->Var("x0"); auto x0 = x0_var->GetMutable(); x0->set_rows(rows0); x0->set_height(height); @@ -74,7 +74,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { // init x1 std::vector rows1{2, 9}; - auto x1_var = scope.Var("x1"); + auto x1_var = scope->Var("x1"); auto x1 = x1_var->GetMutable(); x1->set_rows(rows1); x1->set_height(height); @@ -83,7 +83,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { f::make_ddim({static_cast(rows1.size()), row_numel}), place); set_one(ctx, x1_value, 1.0); - auto out_var = scope.Var("Out"); + auto out_var = scope->Var("Out"); auto out = out_var->GetMutable(); auto out_value = out->mutable_value(); out->set_height(height); @@ -117,15 +117,16 @@ void StartServerNet(bool is_sparse) { f::Scope scope; p::CPUPlace place; if (is_sparse) { - InitSelectedRowsInScope(scope, place); + InitSelectedRowsInScope(place, &scope); } else { - InitTensorsInScope(scope, place); + InitTensorsInScope(place, &scope); } // sub program run in listen_and_serv_op, for simple test we use sum f::ProgramDesc program; const auto &root_block = program.Block(0); auto *optimize_block = program.AppendBlock(root_block); + auto *prefetch_block = program.AppendBlock(root_block); // X for server side tensors, RX for received tensers, must be of same shape. AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block); @@ -135,6 +136,7 @@ void StartServerNet(bool is_sparse) { attrs.insert({"ParamList", std::vector({"Out"})}); attrs.insert({"GradList", std::vector({"x1"})}); attrs.insert({"OptimizeBlock", optimize_block}); + attrs.insert({"PrefetchBlock", prefetch_block}); listen_and_serv_op = f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs); LOG(INFO) << "selected port before run " << selected_port; @@ -148,7 +150,7 @@ TEST(SendRecvOp, CPUDense) { // local net f::Scope scope; p::CPUPlace place; - InitTensorsInScope(scope, place); + InitTensorsInScope(place, &scope); // create rpc client var scope.Var("RPC_CLIENT_VAR"); @@ -191,7 +193,7 @@ TEST(SendRecvOp, CPUSparse) { f::Scope scope; p::CPUPlace place; p::CPUDeviceContext ctx(place); - InitSelectedRowsInScope(scope, place); + InitSelectedRowsInScope(place, &scope); scope.Var("RPC_CLIENT_VAR"); f::AttributeMap attrs; selected_port = static_cast( diff --git a/paddle/fluid/operators/send_vars_op.cc b/paddle/fluid/operators/send_vars_op.cc index 2cbd9e2394800dc3b9c5be1163d16bbec435c533..56b3713d6af28d0787e114a672a503e86cbd85fd 100644 --- a/paddle/fluid/operators/send_vars_op.cc +++ b/paddle/fluid/operators/send_vars_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include // NOLINT #include #include "paddle/fluid/framework/data_type.h" @@ -36,7 +36,7 @@ class SendVarsOp : public framework::OperatorBase { auto ins = Inputs("X"); std::vector epmap = Attr>("epmap"); - int sync_send = Attr("sync_sent"); + int sync_send = Attr("sync_send"); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); diff --git a/paddle/fluid/operators/sgd_op.cc b/paddle/fluid/operators/sgd_op.cc index 074fa9e00f2ec531f324ff10113d95144687d500..06cb0550ad7d4ad0241a4f439ea9ac16d9714c38 100644 --- a/paddle/fluid/operators/sgd_op.cc +++ b/paddle/fluid/operators/sgd_op.cc @@ -35,8 +35,8 @@ class SGDOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, "Learning rate should have 1 element"); auto param_dim = ctx->GetInputDim("Param"); - // TODO(qijun): check dimensions of Param and Grad at complie - // and run time. + // TODO(qijun): check dimensions of Param and Grad at compile + // and runtime. ctx->SetOutputDim("ParamOut", param_dim); } diff --git a/paddle/fluid/operators/split_ids_op.cc b/paddle/fluid/operators/split_ids_op.cc index a54f8a2878c8606e6b487552324d1e7dfa94b9b8..a53cbc8ac5199061dafdc7f4cf560b9e4fc577ab 100644 --- a/paddle/fluid/operators/split_ids_op.cc +++ b/paddle/fluid/operators/split_ids_op.cc @@ -48,11 +48,11 @@ class SplitIdsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutputs("Out"), "SplitIdsOp must has output Out."); auto ids_var_type = ctx->GetInputsVarType("Ids").front(); - PADDLE_ENFORCE_EQ(ids_var_type, framework::proto::VarType::LOD_TENSOR); - auto ids_dims = ctx->GetInputDim("Ids"); - PADDLE_ENFORCE_EQ(ids_dims.size(), 2); - PADDLE_ENFORCE_EQ(ids_dims[1], 1); + if (ids_var_type == framework::proto::VarType::LOD_TENSOR) { + PADDLE_ENFORCE_EQ(ids_dims.size(), 2); + PADDLE_ENFORCE_EQ(ids_dims[1], 1); + } } }; @@ -60,8 +60,9 @@ class SplitIdsOpInferVarType : public framework::VarTypeInference { public: void operator()(const framework::OpDesc &op_desc, framework::BlockDesc *block) const override { + auto *input_var = block->Var(op_desc.Input("Ids")[0]); for (auto &out_var : op_desc.Output("Out")) { - block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR); + block->Var(out_var)->SetType(input_var->GetType()); } } }; @@ -73,4 +74,5 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(split_ids, ops::SplitIdsOp, ops::SplitIdsOpMaker, ops::SplitIdsOpInferVarType); REGISTER_OP_CPU_KERNEL( - split_ids, ops::SplitIdsOpKernel); + split_ids, ops::SplitIdsOpKernel, + ops::SplitIdsOpKernel); diff --git a/paddle/fluid/operators/split_ids_op.h b/paddle/fluid/operators/split_ids_op.h index d36ed398ebce661a62ca92696b0089b5289d5b1c..ba1e903dbb6daaa86b1b664322d100a800fd16b3 100644 --- a/paddle/fluid/operators/split_ids_op.h +++ b/paddle/fluid/operators/split_ids_op.h @@ -24,35 +24,63 @@ namespace operators { template class SplitIdsOpKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext &ctx) const override { auto place = ctx.GetPlace(); if (!platform::is_cpu_place(place)) { PADDLE_THROW("SplitIds do not support GPU kernel"); } - auto& ids_dims = ctx.Input("Ids")->dims(); - const T* ids = ctx.Input("Ids")->data(); - auto outs = ctx.MultiOutput("Out"); - const size_t shard_num = outs.size(); + const auto *ids_var = ctx.InputVar("Ids"); + if (ids_var->IsType()) { + const auto &ids_dims = ctx.Input("Ids")->dims(); + const T *ids = ctx.Input("Ids")->data(); + auto outs = ctx.MultiOutput("Out"); + const size_t shard_num = outs.size(); - std::vector> out_ids; - out_ids.resize(outs.size()); + std::vector> out_ids; + out_ids.resize(outs.size()); - // split id by their shard_num. - for (int i = 0; i < ids_dims[0]; ++i) { - T id = ids[i]; - size_t shard_id = static_cast(id) % shard_num; - out_ids[shard_id].push_back(id); - } + // split id by their shard_num. + for (int i = 0; i < ids_dims[0]; ++i) { + T id = ids[i]; + size_t shard_id = static_cast(id) % shard_num; + out_ids[shard_id].push_back(id); + } + + // create tensor for each shard and send to parameter server + for (size_t i = 0; i < out_ids.size(); ++i) { + auto *shard_t = outs[i]; + std::vector ids = out_ids[i]; + auto *shard_data = shard_t->mutable_data( + framework::make_ddim({static_cast(ids.size()), 1}), place); + for (size_t i = 0; i < ids.size(); ++i) { + shard_data[i] = ids[i]; + } + } + } else if (ids_var->IsType()) { + const auto *ids_selected_rows = ctx.Input("Ids"); + auto &ids_dims = ids_selected_rows->value().dims(); + PADDLE_ENFORCE_EQ(ids_dims[0], ids_selected_rows->rows().size(), ""); + const T *ids = ids_selected_rows->value().data(); + const auto &ids_rows = ids_selected_rows->rows(); + auto outs = ctx.MultiOutput("Out"); + const size_t shard_num = outs.size(); + // get rows for outputs + for (auto &id : ids_rows) { + size_t shard_id = static_cast(id) % shard_num; + outs[shard_id]->mutable_rows()->push_back(id); + } - // create tensor for each shard and send to parameter server - for (size_t i = 0; i < out_ids.size(); ++i) { - auto* shard_t = outs[i]; - std::vector ids = out_ids[i]; - auto* shard_data = shard_t->mutable_data( - framework::make_ddim({static_cast(ids.size()), 1}), place); - for (size_t i = 0; i < ids.size(); ++i) { - shard_data[i] = ids[i]; + int64_t row_width = ids_dims[1]; + for (auto &out : outs) { + out->set_height(ids_selected_rows->height()); + framework::DDim ddim = framework::make_ddim( + {static_cast(out->rows().size()), row_width}); + T *output = out->mutable_value()->mutable_data(ddim, place); + for (size_t i = 0; i < ddim[0]; ++i) { + memcpy(output + i * row_width, ids + out->rows()[i] * row_width, + row_width * sizeof(T)); + } } } } diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index dffac772f11bee2fa3dcdf469a86adc57369b54d..e745509ec8c1f2ec305d7d4aabfdd43d847124b5 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/split_op.h" -#include "paddle/fluid/operators/net_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 9061e137bd1c789d34665729c48c1c2ea9525c8e..108f26fafe7af76eaa613d77ed77748ee43ea234 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -10,9 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sum_op.h" + #include #include #include + #include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/operators/detail/safe_ref.h" @@ -37,7 +39,10 @@ class SumOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputsDim("X"); size_t N = x_dims.size(); - PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1."); + PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0."); + if (N == 1) { + VLOG(3) << "Warning: sum have only one input, may waste memory"; + } framework::DDim in_dim({0}); for (auto& x_dim : x_dims) { diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 884289a7fda65f9713392ec459219b4c89271e73..4fef351c2118e43697606c90a616cd870e78cd77 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -2,13 +2,13 @@ if(WITH_PYTHON) if(WITH_AMD_GPU) hip_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method + DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) else() cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method + DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 2fe829036386086075a7f6ad0b9348a9e8c5e85a..93533e5c9d88a9113d4d3eacb01901a8c14b6324 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -18,7 +18,6 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/backward.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" @@ -125,23 +124,6 @@ void BindProgramDesc(pybind11::module *m) { }) .def("append_block", &pd::ProgramDesc::AppendBlock, pybind11::return_value_policy::reference) - .def("append_backward", - [](pd::ProgramDesc &program_desc, const pd::VarDesc &target, - const std::unordered_set &no_grad_vars) { - pd::ParamGradInfoMap param_grad_map = - AppendBackward(program_desc, target, no_grad_vars); - std::unordered_map< - std::string, std::tuple> - retv; - for (auto it = param_grad_map.begin(); it != param_grad_map.end(); - ++it) { - const auto &grad_info = it->second; - retv[it->first] = std::make_tuple( - grad_info.name_, grad_info.block_idx_, grad_info.op_idx_); - } - return retv; - }) .def("block", &pd::ProgramDesc::MutableBlock, pybind11::return_value_policy::reference) .def("num_blocks", &pd::ProgramDesc::Size) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d559743a69584c96c9f543692191964c68c7b5e1..a1e8ff6399f0812773a7bb753c90e4400b1763d9 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -20,9 +20,6 @@ limitations under the License. */ #include #include -#include "paddle/fluid/pybind/protobuf.h" - -#include "paddle/fluid/framework/backward.h" #include "paddle/fluid/framework/channel.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" @@ -31,18 +28,18 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/operators/cond_op.h" -#include "paddle/fluid/operators/net_op.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/exception.h" -#include "paddle/fluid/pybind/pybind.h" +#include "paddle/fluid/pybind/protobuf.h" +#include "paddle/fluid/pybind/pybind.h" // NOLINT #include "paddle/fluid/pybind/recordio.h" #include "paddle/fluid/pybind/tensor_py.h" @@ -239,11 +236,6 @@ All parameter, weight, gradient are variables in Paddle. }, py::return_value_policy::reference) #endif - .def("get_net", - [](Variable &self) -> operators::NetOp * { - return self.GetMutable(); - }, - py::return_value_policy::reference) .def("get_reader", [](Variable &self) -> framework::ReaderHolder * { PADDLE_ENFORCE(self.IsType()); @@ -388,11 +380,6 @@ All parameter, weight, gradient are variables in Paddle. desc.InitializationErrorString()); return OpRegistry::CreateOp(desc); }) - .def("backward", - [](const OperatorBase &forwardOp, - const std::unordered_set &no_grad_vars) { - return Backward(forwardOp, no_grad_vars).release(); - }) .def("run", [](OperatorBase &self, const Scope &scope, const platform::CPUPlace &place) { self.Run(scope, place); }) @@ -420,42 +407,6 @@ All parameter, weight, gradient are variables in Paddle. [](const OperatorBase &op) { return op.OutputVars(false); }) .def("support_gpu", &OperatorBase::SupportGPU); - py::class_(m, "Net") - .def_static("create", - []() -> operators::NetOp * { - auto *retv = new operators::NetOp; - retv->SetType("plain_net"); - return retv; - }) - .def("append_op", [](operators::NetOp &self, - const OperatorBase &op) { self.AppendOp(op); }) - .def("complete_add_op", &operators::NetOp::CompleteAddOp) - .def("complete_add_op", [](std::shared_ptr &self) { - self->CompleteAddOp(); - }); - - // cond_op - py::class_(m, "CondOp") - .def_static("create", - [](py::bytes protobin) -> operators::CondOp * { - proto::OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - auto cond_op = OpRegistry::CreateOp(desc); - return static_cast(cond_op.release()); - }) - .def("set_truenet", - [](operators::CondOp &self, const operators::NetOp &net) -> void { - self.set_truenet(net.Clone()); - }) - .def("set_falsenet", - [](operators::CondOp &self, const operators::NetOp &net) -> void { - self.set_falsenet(net.Clone()); - }); - py::class_(m, "Executor") .def(py::init()) .def("run", diff --git a/paddle/fluid/recordio/chunk.cc b/paddle/fluid/recordio/chunk.cc index e7ebbba452c5c37113f0962e459da65c66b70873..82d9aa601cf450b8f90573d6c582bb12ced7a48a 100644 --- a/paddle/fluid/recordio/chunk.cc +++ b/paddle/fluid/recordio/chunk.cc @@ -14,13 +14,13 @@ #include "paddle/fluid/recordio/chunk.h" +#include #include #include #include #include "paddle/fluid/platform/enforce.h" -#include "snappy_stream/include/snappystream.hpp" -#include "zlib/include/zlib.h" +#include "snappystream.hpp" namespace paddle { namespace recordio { diff --git a/paddle/fluid/recordio/header.cc b/paddle/fluid/recordio/header.cc index ed09d58f6a3e2dba50bf4407c0463480575b248e..c4822329a43a79adc81f0b0cf145b22661ac6f50 100644 --- a/paddle/fluid/recordio/header.cc +++ b/paddle/fluid/recordio/header.cc @@ -13,6 +13,9 @@ // limitations under the License. #include "paddle/fluid/recordio/header.h" + +#include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 4885b74e6c6644704cff01dbf49975d6e87ce0c4..be1565ab533037d4bc72b6d2834c48b04638c297 100755 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -231,7 +231,7 @@ function gen_fluid_inference_lib() { Deploying fluid inference library ... ======================================== EOF - make inference_lib_dist + make -j `nproc` inference_lib_dist fi } diff --git a/python/paddle/fluid/distribute_transpiler.py b/python/paddle/fluid/distribute_transpiler.py index e18ace844e203be5b1b37040432bdad565a0734c..b0522b49f44d8ed0c8c7e3148e24f312fbdd1123 100644 --- a/python/paddle/fluid/distribute_transpiler.py +++ b/python/paddle/fluid/distribute_transpiler.py @@ -13,14 +13,17 @@ # limitations under the License. from __future__ import print_function -import framework -from framework import Program, default_main_program, default_startup_program, Parameter, Variable -import optimizer -from layer_helper import LayerHelper -import distributed_splitter as splitter + import math + +import distributed_splitter as splitter +import framework +from framework import Program, default_main_program, Variable from . import core -import debuger + +LOOKUP_TABLE_TYPE = "lookup_table" +LOOKUP_TABLE_GRAD_TYPE = "lookup_table_grad" +RPC_CLIENT_VAR_NAME = "RPC_CLIENT_VAR" class VarBlock: @@ -35,9 +38,9 @@ class VarBlock: class UnionFind(object): - """ Union-find data struct. + """ Union-find data structure. - Union-find is a data struct that keeps track of a set of elements partitioned + Union-find is a data structure that keeps track of a set of elements partitioned into a number of disjoint (non-overlapping) subsets. Reference: @@ -185,19 +188,66 @@ class DistributeTranspiler: assert (callable(split_method)) if program is None: program = default_main_program() - self.program = program - self.trainers = trainers + self.origin_program = program + self.trainer_num = trainers self.optimize_ops = optimize_ops # TODO(typhoonzero): currently trainer_id is fetched from cluster system # like Kubernetes, we should port this to use etcd later when developing # fluid distributed training with fault-tolerance. self.trainer_id = trainer_id pserver_endpoints = pservers.split(",") + self.pserver_endpoints = pserver_endpoints + + # process lookup_table_op + # 1. check all lookup_table_op is distributed + # 2. check all lookup_table_op share the same table. + distributed_lookup_table_ops = [] + # support only one distributed_lookup_table now + self.table_name = None + for op in program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if op.attrs['is_distributed'] is True: + if self.table_name is None: + self.table_name = op.input("W")[0] + if self.table_name != op.input("W")[0]: + raise RuntimeError("all distributed lookup_table_ops" + " should have only one table") + distributed_lookup_table_ops.append(op) + else: + if self.table_name is not None: + assert op.input("W")[0] != self.table_name + + self.has_distributed_lookup_table = len( + distributed_lookup_table_ops) > 0 # step1: For large parameters and gradients, split them into smaller # blocks. param_list = [pg[0] for pg in params_grads] grad_list = [pg[1] for pg in params_grads] + + if self.has_distributed_lookup_table: + param_list = [ + param for param in param_list if param.name != self.table_name + ] + grad_list = [ + grad for grad in grad_list + if grad.name != framework.grad_var_name(self.table_name) + ] + self.table_param_grad = [ + param_grad for param_grad in params_grads + if param_grad[0].name == self.table_name + ][0] + table_grad_var = self.table_param_grad[1] + self.table_grad_list = [ + program.global_block().create_var( + name="%s.trainer_%d.pserver_%d" % + (table_grad_var.name, trainer_id, index), + type=table_grad_var.type, + shape=table_grad_var.shape, + dtype=table_grad_var.dtype) + for index in range(len(self.pserver_endpoints)) + ] + grad_blocks = split_dense_variable(grad_list, len(pserver_endpoints)) param_blocks = split_dense_variable(param_list, len(pserver_endpoints)) # step2: Create new vars for the parameters and gradients blocks and @@ -229,7 +279,7 @@ class DistributeTranspiler: self.param_grad_ep_mapping[ep]["grads"].append(grad) rpc_client_var = program.global_block().create_var( - name="RPC_CLIENT_VAR", + name=RPC_CLIENT_VAR_NAME, persistable=True, type=core.VarDesc.VarType.RAW) @@ -252,13 +302,19 @@ class DistributeTranspiler: outputs={"Out": [orig_param]}, attrs={"axis": 0}) + if self.has_distributed_lookup_table: + self._replace_lookup_table_op_with_prefetch(program, rpc_client_var, + eplist) + self._split_table_grad_and_add_send_vars(program, rpc_client_var, + pserver_endpoints) + def get_trainer_program(self): # remove optimize ops and add a send op to main_program - self.program.global_block().delete_ops(self.optimize_ops) - self.program.sync_with_cpp() + self.origin_program.global_block().delete_ops(self.optimize_ops) + self.origin_program.sync_with_cpp() # FIXME(typhoonzero): serialize once will fix error occurs when clone. - self.program.__str__() - return self.program + self.origin_program.__str__() + return self.origin_program def get_pserver_program(self, endpoint): """ @@ -294,8 +350,8 @@ class DistributeTranspiler: type=v.type, dtype=v.dtype, shape=v.shape) - if self.trainers > 1: - for trainer_id in xrange(self.trainers): + if self.trainer_num > 1: + for trainer_id in xrange(self.trainer_num): var = pserver_program.global_block().create_var( name="%s.trainer_%d" % (orig_var_name, trainer_id), persistable=False, @@ -309,7 +365,7 @@ class DistributeTranspiler: # step3 optimize_block = pserver_program.create_block(0) # step 4 - # Create a union-find data struct from optimize ops, + # Create a union-find data structure from optimize ops, # If two ops are connected, we could add these two ops # into one set. ufind = self._create_ufind(self.optimize_ops) @@ -384,6 +440,23 @@ class DistributeTranspiler: # __append_optimize_op__(glb_op, optimize_block) # break + # process distributed lookup_table + prefetch_block = None + if self.has_distributed_lookup_table: + pserver_index = self.pserver_endpoints.index(endpoint) + self._create_table_optimize_block(pserver_index, pserver_program, + append_block) + prefetch_block = self._create_prefetch_block( + pserver_index, pserver_program, optimize_block) + + # NOTE: if has_distributed_lookup_table is False, then prefetch_block will + # not be executed, so it's safe to use optimize_block to hold the place + if self.has_distributed_lookup_table: + assert prefetch_block is not None + else: + assert prefetch_block is None + prefetch_block = pserver_program.global_block() + # step5 append the listen_and_serv op pserver_program.global_block().append_op( type="listen_and_serv", @@ -392,8 +465,10 @@ class DistributeTranspiler: attrs={ "OptimizeBlock": optimize_block, "endpoint": endpoint, - "Fanin": self.trainers + "Fanin": self.trainer_num, + "PrefetchBlock": prefetch_block }) + pserver_program.sync_with_cpp() return pserver_program @@ -451,6 +526,197 @@ class DistributeTranspiler: attrs=op.attrs) return s_prog + # transpiler function for dis lookup_table + def _replace_lookup_table_op_with_prefetch(self, program, rpc_client_var, + eplist): + # 1. replace lookup_table_op with split_ids_op -> prefetch_op -> sum_op + self.prefetch_input_vars = None + self.prefetch_output_vars = None + + continue_search_lookup_table_op = True + while continue_search_lookup_table_op: + continue_search_lookup_table_op = False + all_ops = program.global_block().ops + for op in all_ops: + if op.type == LOOKUP_TABLE_TYPE: + continue_search_lookup_table_op = True + + op_index = list(all_ops).index(op) + ids_name = op.input("Ids") + out_name = op.output("Out") + + if self.prefetch_input_vars is None: + ids_var = program.global_block().vars[ids_name[0]] + self.prefetch_input_vars = self.create_splited_vars( + source_var=ids_var, + block=program.global_block(), + tag="_prefetch_in_") + if self.prefetch_output_vars is None: + out_var = program.global_block().vars[out_name[0]] + self.prefetch_output_vars = self.create_splited_vars( + source_var=out_var, + block=program.global_block(), + tag="_prefetch_out_") + + # insert split_ids_op + program.global_block().insert_op( + index=op_index, + type="split_ids", + inputs={ + 'Ids': [ + program.global_block().vars[varname] + for varname in ids_name + ] + }, + outputs={"Out": self.prefetch_input_vars}) + + # insert prefetch_op + program.global_block().insert_op( + index=op_index + 1, + type="prefetch", + inputs={'X': self.prefetch_input_vars}, + outputs={ + "Out": self.prefetch_output_vars, + "RPCClient": rpc_client_var + }, + attrs={"epmap": eplist}) + + # insert concat_op + program.global_block().insert_op( + index=op_index + 2, + type="concat", + inputs={'X': self.prefetch_output_vars}, + outputs={ + "Out": [ + program.global_block().vars[varname] + for varname in out_name + ] + }, + attrs={"axis": 0}) + + # delete lookup_table_op + program.global_block().delete_ops([op]) + program.sync_with_cpp() + # break for loop + break + + def _split_table_grad_and_add_send_vars(self, program, rpc_client_var, + pserver_endpoints): + # 2. add split_ids_op and send_vars_op to send gradient to pservers + # there should only be one table_name + all_ops = program.global_block().ops + table_grad_name = framework.grad_var_name(self.table_name) + for op in all_ops: + if table_grad_name in op.output_arg_names: + op_index = list(all_ops).index(op) + # insert split_ids_op + program.global_block().insert_op( + index=op_index + 1, + type="split_ids", + inputs={ + 'Ids': [program.global_block().vars[table_grad_name]] + }, + outputs={"Out": self.table_grad_list}) + program.global_block().insert_op( + index=op_index + 2, + type="send_vars", + inputs={'X': self.table_grad_list}, + outputs={"RPCClient": rpc_client_var}, + attrs={"sync_send": True, + "epmap": pserver_endpoints}) + break + + def _create_prefetch_block(self, pserver_index, pserver_program, + optimize_block): + # STEP: create prefetch block + table_var = pserver_program.global_block().vars[self.table_name] + prefetch_block = pserver_program.create_block(optimize_block.idx) + trainer_ids = self.prefetch_input_vars[pserver_index] + pserver_ids = pserver_program.global_block().create_var( + name=trainer_ids.name, + type=trainer_ids.type, + shape=trainer_ids.shape, + dtype=trainer_ids.dtype) + trainer_out = self.prefetch_output_vars[pserver_index] + pserver_out = pserver_program.global_block().create_var( + name=trainer_out.name, + type=trainer_out.type, + shape=trainer_out.shape, + dtype=trainer_out.dtype) + prefetch_block.append_op( + type=LOOKUP_TABLE_TYPE, + inputs={'Ids': pserver_ids, + "W": table_var}, + outputs={"Out": pserver_out}, + attrs={ + "is_sparse": True, # has no effect on lookup_table op + "is_distributed": True, + "padding_idx": -1 + }) + return prefetch_block + + def _create_table_optimize_block(self, pserver_index, pserver_program, + append_block): + def _clone_var(block, var, persistable=True): + assert isinstance(var, Variable) + return block.create_var( + name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + persistable=persistable) + + # STEP: create table optimize block + # create table param and grad var in pserver program + param_var = _clone_var( + pserver_program.global_block(), + self.origin_program.global_block().vars[self.table_name]) + grad_var = _clone_var( + pserver_program.global_block(), + self.origin_program.global_block().vars[framework.grad_var_name( + self.table_name)], + persistable=False) + + # create grad vars in pserver program + table_grad_var = self.table_param_grad[1] + table_grad_list = [ + pserver_program.global_block().create_var( + name="%s.trainer_%d.pserver_%d" % + (table_grad_var.name, index, pserver_index), + type=table_grad_var.type, + shape=table_grad_var.shape, + dtype=table_grad_var.dtype) for index in range(self.trainer_num) + ] + + # create table optimize block in pserver program + table_opt_op = [ + op for op in self.optimize_ops + if op.input("Param")[0] == self.table_name + ][0] + table_opt_block = pserver_program.create_block(append_block.idx) + # only support sgd now + assert table_opt_op.type == "sgd" + + # append sum op for table_grad_list + table_opt_block.append_op( + type="sum", + inputs={"X": table_grad_list}, + outputs={"Out": [grad_var]}) + + lr_var = pserver_program.global_block().vars[table_opt_op.input( + "LearningRate")[0]] + inputs = { + "Param": [param_var], + "Grad": [grad_var], + "LearningRate": [lr_var] + } + outputs = {"ParamOut": [param_var]} + table_opt_block.append_op( + type=table_opt_op.type, + inputs=inputs, + outputs=outputs, + attrs=table_opt_op.attrs) + # ====================== private transpiler functions ===================== def _create_vars_from_blocklist(self, program, @@ -512,7 +778,17 @@ class DistributeTranspiler: program.global_block().sync_with_cpp() return var_mapping - def _clone_var(self, block, var): + def create_splited_vars(self, source_var, block, tag): + return [ + block.create_var( + name=str(source_var.name + tag + str(index)), + type=source_var.type, + shape=source_var.shape, + dtype=source_var.dtype) + for index in range(len(self.pserver_endpoints)) + ] + + def _clone_var(self, block, var, persistable=True): assert isinstance(var, Variable) return block.create_var( name=var.name, @@ -520,12 +796,12 @@ class DistributeTranspiler: dtype=var.dtype, type=var.type, lod_level=var.lod_level, - persistable=True) + persistable=persistable) def _append_split_op(self, program, gradblocks): # Split variables that need to be split and append respective ops add_suffix = False - if self.trainers > 1: + if self.trainer_num > 1: add_suffix = True var_mapping = self._create_vars_from_blocklist( program, gradblocks, add_trainer_suffix=add_suffix) @@ -616,9 +892,9 @@ class DistributeTranspiler: return merged_var = \ pserver_block.vars[self._orig_varname(grad_block.name)] - if self.trainers > 1: + if self.trainer_num > 1: vars2merge = [] - for i in xrange(self.trainers): + for i in xrange(self.trainer_num): per_trainer_name = "%s.trainer_%d" % \ (self._orig_varname(grad_block.name), i) vars2merge.append(pserver_block.vars[per_trainer_name]) @@ -633,7 +909,7 @@ class DistributeTranspiler: type="scale", inputs={"X": merged_var}, outputs={"Out": merged_var}, - attrs={"scale": 1.0 / float(self.trainers)}) + attrs={"scale": 1.0 / float(self.trainer_num)}) new_inputs[key] = merged_var elif key == "Param": # param is already created on global program @@ -669,7 +945,7 @@ class DistributeTranspiler: new_shape = None if key in ["Param", "Grad", "LearningRate"]: continue - var = self.program.global_block().vars[opt_op.input(key)[0]] + var = self.origin_program.global_block().vars[opt_op.input(key)[0]] # update accumulator variable shape param_shape = new_inputs["Param"].shape new_shape = self._get_optimizer_input_shape(opt_op.type, key, @@ -682,8 +958,8 @@ class DistributeTranspiler: new_inputs[key] = tmpvar # change output's ParamOut variable - outputs = self._get_output_map_from_op(self.program.global_block().vars, - opt_op) + outputs = self._get_output_map_from_op( + self.origin_program.global_block().vars, opt_op) outputs["ParamOut"] = new_inputs["Param"] optimize_block.append_op( @@ -695,8 +971,8 @@ class DistributeTranspiler: def _append_pserver_non_opt_ops(self, optimize_block, opt_op): program = optimize_block.program # Append the ops for parameters that do not need to be optimized/updated - inputs = self._get_input_map_from_op(self.program.global_block().vars, - opt_op) + inputs = self._get_input_map_from_op( + self.origin_program.global_block().vars, opt_op) for varlist in inputs.itervalues(): if not isinstance(varlist, list): varlist = [varlist] @@ -709,8 +985,8 @@ class DistributeTranspiler: dtype=var.dtype, shape=var.shape) - outputs = self._get_output_map_from_op(self.program.global_block().vars, - opt_op) + outputs = self._get_output_map_from_op( + self.origin_program.global_block().vars, opt_op) for varlist in outputs.itervalues(): if not isinstance(varlist, list): @@ -783,7 +1059,6 @@ class DistributeTranspiler: if same_or_split_var(n, param) and n != param: return True return False - return False def _get_input_map_from_op(self, varmap, op): """Returns a dict from op input name to the vars in varmap.""" @@ -821,7 +1096,7 @@ class DistributeTranspiler: find_ops = [] # find ops which output is lr var - block = self.program.global_block() + block = self.origin_program.global_block() for op in block.ops: if set(op.output_arg_names) & lr_vars: find_ops.append(op) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 793421a22fbf6f3c25ec6a9bf8359f4e71e905de..4b841ef31dcb67ab660475cf6e231fd8a4ae83d6 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1119,24 +1119,6 @@ class Program(object): def current_block(self): return self.blocks[self.current_block_idx] - def append_backward(self, target, no_grad_set=None): - """ - return map(param_name -> (grad_name, block_index, op_index)) - """ - assert isinstance(target, Variable) - if no_grad_set is None: - no_grad_set = set() - try: - param_to_grad_info = self.desc.append_backward(target.desc, - no_grad_set) - except Exception as e: - raise core.EnforceNotMet( - str(e) + "\nCurrent protobuf is\n{0}".format( - self.to_string(False))) - - self.sync_with_cpp() - return param_to_grad_info - def create_block(self, parent_idx=None): new_block_idx = len(self.blocks) parent = self.current_block() if parent_idx is None else self.block( @@ -1201,6 +1183,8 @@ class Parameter(Variable): self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) + self.do_model_average = kwargs.get('do_model_average', None) + def __str__(self): return self.to_string(True) @@ -1221,7 +1205,7 @@ class Parameter(Variable): if with_details: res_str = Variable.to_string(self, throw_on_error, True) additional_attr = ("trainable", "optimize_attr", "regularizer", - "gradient_clip_attr") + "gradient_clip_attr", "do_model_average") for attr_name in additional_attr: res_str += "%s: %s\n" % (attr_name, str(getattr(self, attr_name))) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index d2e7d58524bfb11627b6acb36ef873c41b348f0f..5c2c2dd7abebf8960d68b4c4dfd746a4e27acd03 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -218,6 +218,7 @@ def fc(input, def embedding(input, size, is_sparse=False, + is_distributed=False, padding_idx=None, param_attr=None, dtype='float32'): @@ -268,8 +269,11 @@ def embedding(input, inputs={'Ids': input, 'W': w}, outputs={'Out': tmp}, - attrs={'is_sparse': is_sparse, - 'padding_idx': padding_idx}) + attrs={ + 'is_sparse': is_sparse, + 'is_distributed': is_distributed, + 'padding_idx': padding_idx + }) return tmp @@ -1516,7 +1520,8 @@ def batch_norm(input, in_place=False, name=None, moving_mean_name=None, - moving_variance_name=None): + moving_variance_name=None, + do_model_average_for_mean_and_var=False): """ This function helps create an operator to implement the BatchNorm layer using the configurations from the input parameters. @@ -1547,7 +1552,10 @@ def batch_norm(input, mean = helper.create_parameter( attr=ParamAttr( - name=moving_mean_name, initializer=Constant(0.0), trainable=False), + name=moving_mean_name, + initializer=Constant(0.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=input.dtype) mean.stop_gradient = True @@ -1556,7 +1564,8 @@ def batch_norm(input, attr=ParamAttr( name=moving_variance_name, initializer=Constant(1.0), - trainable=False), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=input.dtype) variance.stop_gradient = True @@ -3374,14 +3383,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): Here are some examples to explain it. 1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape - is [6, 8], the reshape operator will transform x into a 2-D tensor with + is [6, 8], the reshape operator will transform x into a 2-D tensor with shape [6, 8] and leaving x's data unchanged. 2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape specified is [2, 3, -1, 2], the reshape operator will transform x into a 4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this - case, one dimension of the target shape is set to -1, the value of this - dimension is inferred from the total element number of x and remaining + case, one dimension of the target shape is set to -1, the value of this + dimension is inferred from the total element number of x and remaining dimensions. 3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape @@ -3615,7 +3624,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): def pad(x, paddings, pad_value=0., name=None): """ Pads a tensor with a constant value given by :attr:`pad_value`, and the - padded width is specified by :attr:`paddings`. + padded width is specified by :attr:`paddings`. Specifically, the number of values padded before the contents of :attr:`x` in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number @@ -3643,7 +3652,7 @@ def pad(x, paddings, pad_value=0., name=None): x (Variable): The input tensor variable. paddings (list): A list of integers. Its elements specify the padded width before and after for each dimension in turn. - The length of :attr:paddings must be + The length of :attr:paddings must be :math:`rank(x) \\times 2`. pad_value (float): The constant value used to pad. name(str|None): A name for this layer(optional). If set None, the layer diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 180575c35dc6e115e11cccf9fff9fb2d3cd7e9a6..36503cac6d5391821b977d90e6b77c4df7e3b564 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import re from collections import defaultdict from paddle.fluid.framework import Program import framework @@ -818,8 +818,8 @@ class ModelAverage(Optimizer): min_average_window, max_average_window and current update times. Args: - params_grads: A list of parameter-grad variable pairs. average_window_rate: The rate of average window. + params_grads: A list of parameter-grad variable pairs. min_average_window: The minimum size of average window. max_average_window: The maximum size of average window. @@ -840,8 +840,8 @@ class ModelAverage(Optimizer): """ def __init__(self, - params_grads, average_window_rate, + params_grads=None, min_average_window=10000, max_average_window=10000, **kwargs): @@ -849,24 +849,37 @@ class ModelAverage(Optimizer): self.average_window = average_window_rate self.min_average_window = min_average_window self.max_average_window = max_average_window - self.params_grads = params_grads + + self.params_grads = [] if params_grads is None else params_grads + params = {} + for param, grad in self.params_grads: + if param.do_model_average != False: + params[param.name] = (param, grad) + for param in framework.default_main_program().global_block( + ).all_parameters(): + if param.name not in params and param.do_model_average != False: + grad = param.block.create_var( + name=unique_name.generate(".".join([param.name, 'tmp'])), + dtype=param.dtype, + persistable=False, + stop_gradient=True) + params[param.name] = (param, grad) + self.params_grads = params.values() + for param, grad in self.params_grads: - if grad is not None: - self._append_average_accumulate_op(param) + self._append_average_accumulate_op(param) self.apply_program = Program() block = self.apply_program.global_block() with program_guard(main_program=self.apply_program): for param_grad in self.params_grads: - if param_grad[1] is not None: - self._add_average_apply_op(block, param_grad) + self._add_average_apply_op(block, param_grad) self.restore_program = Program() block = self.restore_program.global_block() with program_guard(main_program=self.restore_program): for param_grad in self.params_grads: - if param_grad[1] is not None: - self._add_average_restore_op(block, param_grad) + self._add_average_restore_op(block, param_grad) def _add_average_apply_op(self, block, param_grad): param = block.clone_variable(param_grad[0]) diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 255cd2104325afa31449cbd3875499a7c5d7f572..1c6970441bccdc1c1221503256c30c83502bd123 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -28,13 +28,15 @@ class ParamAttr(object): learning_rate=1.0, regularizer=None, trainable=True, - gradient_clip=None): + gradient_clip=None, + do_model_average=None): self.name = name self.initializer = initializer self.learning_rate = learning_rate self.regularizer = regularizer self.trainable = trainable self.gradient_clip = gradient_clip + self.model_average = do_model_average def set_default_initializer(self, initializer): if initializer is None: @@ -80,7 +82,8 @@ class ParamAttr(object): }, 'regularizer': self.regularizer, 'trainable': self.trainable, - 'gradient_clip_attr': self.gradient_clip + 'gradient_clip_attr': self.gradient_clip, + 'model_average': self.model_average } if with_initializer: kwargs['initializer'] = self.initializer @@ -90,7 +93,7 @@ class ParamAttr(object): class WeightNormParamAttr(ParamAttr): """ Used for weight normalization. Any field in ParamAttr can also be set here. - Besides, an extra field dim can be set to indicate the dimension except + Besides, an extra field dim can be set to indicate the dimension except which to normalize. """ # List to record the parameters reparameterized by weight normalization. diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 10aa63e18a6eeaa44e5b12f7532998dca2bc5e9f..7ecf9a1459ffc9740ae8c12df3902163ee689f59 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -14,23 +14,13 @@ import unittest import numpy as np -from op_test import OpTest import paddle.fluid.core as core from paddle.fluid.op import Operator +import paddle.fluid as fluid +from op_test import OpTest from paddle.fluid.framework import grad_var_name -def get_backward_op(scope, op, no_grad_set): - backward_op = core.Operator.backward(op, no_grad_set) - for input in backward_op.input_vars(): - var = scope.var(input) - var.get_tensor() - for output in backward_op.output_vars(): - var = scope.var(output) - var.get_tensor() - return backward_op - - def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): x_shape = x.shape if len(x_shape) == 2: @@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): def _reference_training(x, scale, offset, epsilon, data_format): x_shape = x.shape - if len(x_shape) == 2: - if data_format == "NCHW": - x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) - else: - x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1])) if data_format == "NCHW": n, c, h, w = x.shape @@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format): offset_tile = np.reshape(offset, (1, c, 1, 1)) offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) y = normalized * scale_tile + offset_tile - if len(x_shape) == 2: - y = np.reshape(y, (y.shape[0], y.shape[1])) return y, mean, var elif data_format == "NHWC": x_square = x * x @@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format): var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) y = normalized * scale + offset - if len(x_shape) == 2: - y = np.reshape(y, x_shape) return y, mean, var else: raise ValueError("Unknown data order.") -def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): +def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format): # Use the following formulas to calculate gradients: # grad_scale = # sum(grad_y * (x - mean)) * rsqrt(var + epsilon) # # grad_offset = sum(output_y) # - # grad_x = + # x_grad = # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation - x_shape = x.shape - - if len(x_shape) == 2: - if data_format == "NCHW": - x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) - grad_y = np.reshape(grad_y, - (grad_y.shape[0], grad_y.shape[1], 1, 1)) - else: - x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1])) - grad_y = np.reshape(grad_y, - (grad_y.shape[0], 1, 1, grad_y.shape[1])) - if data_format == "NCHW": x = np.transpose(x, (0, 2, 3, 1)) - grad_y = np.transpose(grad_y, (0, 2, 3, 1)) + y_grad = np.transpose(y_grad, (0, 2, 3, 1)) - # raise ValueError("data_format must be NHWC, got %s." % data_format) - grad_x = scale * (grad_y - np.mean( - grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean( - grad_y * (x - mean), axis=(0, 1, 2)) / + x_grad = scale * (y_grad - np.mean( + y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean( + y_grad * (x - mean), axis=(0, 1, 2)) / (var + epsilon)) / np.sqrt(var + epsilon) - grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon), + grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2)) - grad_offset = np.sum(grad_y, axis=(0, 1, 2)) + grad_offset = np.sum(y_grad, axis=(0, 1, 2)) # transfer back to N, C, H, W if data_format == "NCHW": - grad_x = np.transpose(grad_x, (0, 3, 1, 2)) + x_grad = np.transpose(x_grad, (0, 3, 1, 2)) x = np.transpose(x, (0, 3, 1, 2)) - grad_y = np.transpose(grad_y, (0, 3, 1, 2)) + y_grad = np.transpose(y_grad, (0, 3, 1, 2)) - if len(x_shape) == 2: - grad_x = np.reshape(grad_x, x_shape) - return grad_x, grad_scale, grad_offset + return x_grad, grad_scale, grad_offset def create_or_get_tensor(scope, var_name, var, place): @@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None): __set_tensor__(output, data) -class TestBatchNormOpInference(OpTest): +class TestBatchNormOpInference(unittest.TestCase): def setUp(self): self.dtype = np.float32 @@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): self.check_with_place(place, data_format, self.dtype, [2, 3]) -class TestBatchNormOpTraining(OpTest): +class TestBatchNormOpTraining(unittest.TestCase): def __assert_close(self, tensor, np_array, msg, atol=1e-4): + if not np.allclose(np.array(tensor), np_array, atol=atol): + import pdb + pdb.set_trace() self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) - def test_python_testing(self): - data_format = "NHWC" - epsilon = 0.00001 - - n, h, w, c = 2, 3, 4, 5 - x_shape = [n, h, w, c] - scale_shape = [c] - - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) - - mean = np.zeros(scale_shape).astype(np.float32) - variance = np.ones(scale_shape).astype(np.float32) - - y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance, - epsilon, "NHWC") - - # running N, C, H, W case - # should produce the same results - x_shape2 = [n, c, h, w] - x_val2 = np.transpose(x_val, (0, 3, 1, 2)) - y_out2 = _reference_testing(x_val2, scale_val, bias_val, mean, variance, - epsilon, "NCHW") - - # transfer (N, C, H, W) back to (N, H, W, C) - y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1)) - self.__assert_close(y_out, y_out2_trans, "inference output") - print 'python: NHWC, NCHW, inference checking passed' - - def test_python_training(self): - data_format = "NHWC" - epsilon = 0.00001 - momentum = 0.9 - - # N, H, W, C: 2, 3, 4, 2 - n, h, w, c = 2, 3, 4, 5 - x_shape = [n, h, w, c] - scale_shape = [c] - - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) - - mean = np.zeros(scale_shape).astype(np.float32) - variance = np.ones(scale_shape).astype(np.float32) - - # run forward - y_out, saved_mean, var_ref = _reference_training( - x_val, scale_val, bias_val, epsilon, "NHWC") - - # - mean_out = saved_mean * (1. - momentum) + momentum * mean - variance_out = var_ref * (1. - momentum) + momentum * variance - saved_variance = 1. / np.sqrt(var_ref + epsilon) - - # running N, C, H, W case - # should produce the same results - x_shape2 = [n, c, h, w] - x_val2 = np.transpose(x_val, (0, 3, 1, 2)) - y_out2, saved_mean2, var_ref2 = _reference_training( - x_val2, scale_val, bias_val, epsilon, "NCHW") - - self.__assert_close(saved_mean, saved_mean2, "batch mean") - self.__assert_close(var_ref, var_ref2, "batch variance") - - # transfer (N, C, H, W) back to (N, H, W, C) - y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1)) - self.__assert_close(y_out, y_out2_trans, "batch output") - print 'python: NHWC, NCHW, forward checking passed' - - # test backward now - # NHWC - self.y_grad = np.random.random_sample(x_shape).astype(np.float32) - y_grad = self.y_grad - # y_grad = np.ones(x_shape).astype(np.float32) - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC") - - # NCHW - y_grad2 = np.transpose(y_grad, (0, 3, 1, 2)) - # y_grad2 = np.ones(x_shape2).astype(np.float32) - x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad( - x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW") - - self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient") - self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient") - - x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1)) - self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient") - print 'python: NHWC, NCHW, backward checking passed' - def test_forward_backward(self): def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 - - if len(shape) == 2: - x_shape = shape - c = shape[1] + if data_layout == "NCHW": + n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: - # n, h, w, c = 2, 3, 4, 2 n, h, w, c = shape[0], shape[1], shape[2], shape[3] - if data_format == "NHWC": - x_shape = [n, h, w, c] - elif data_format == "NCHW": - x_shape = [n, c, h, w] - else: - raise ValueError("Unknown data type.") scale_shape = [c] - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) - + np.random.seed(123) + x = np.random.random_sample(shape).astype(np.float32) + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) # run forward - y_out, saved_mean, var_ref = _reference_training( - x_val, scale_val, bias_val, epsilon, data_format) - - # update moving mean and variance + y, saved_mean, var_ref = _reference_training(x, scale, bias, + epsilon, data_layout) mean_out = saved_mean * (1. - momentum) + momentum * mean variance_out = var_ref * (1. - momentum) + momentum * variance saved_variance = 1. / np.sqrt(var_ref + epsilon) - - # for gradient test - # y_grad = np.ones(x_shape).astype(np.float32) - y_grad = np.zeros(x_shape).astype(np.float32) - if len(y_grad.shape) == 2: - y_grad[0, 0] = 1. - else: - y_grad[0, 0, 0, 0] = 1. - # y_grad = np.random.random_sample(x_shape).astype(np.float32) - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, - data_format) - - scope = core.Scope() - - # create input - x_tensor = create_or_get_tensor(scope, "x_val", x_val, place) - scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val, - place) - bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val, - place) - mean_tensor = create_or_get_tensor(scope, "mean", mean, place) - variance_tensor = create_or_get_tensor(scope, "variance", variance, - place) - - # create output - y_tensor = create_or_get_tensor(scope, "y_out", None, place) - saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None, - place) - saved_variance_tensor = create_or_get_tensor( - scope, "saved_variance", None, place) - mean_out_tensor = mean_tensor - variance_out_tensor = variance_tensor - - batch_norm_op = Operator( - "batch_norm", - # inputs - X="x_val", - Scale="scale_val", - Bias="bias_val", - Mean="mean", - Variance="variance", - # outputs - Y="y_out", - MeanOut="mean", - VarianceOut="variance", - SavedMean="saved_mean", - SavedVariance="saved_variance", - # attrs - is_test=False, - data_layout=data_layout, - momentum=momentum, - epsilon=epsilon) - - batch_norm_op.run(scope, place) - - # check forward result - self.__assert_close(y_tensor, y_out, "y_out") - self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean") - self.__assert_close(saved_variance_tensor, saved_variance, - "saved_variance") - self.__assert_close(mean_out_tensor, mean_out, "mean_out") - if isinstance(place, core.CUDAPlace): - atol = 5e-2 - else: - atol = 1e-4 - self.__assert_close(variance_out_tensor, variance_out, - "variance_out", atol) - print "op test forward passed: ", str(place), data_layout - # run backward - batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set()) - set_output_grad( - scope, - ["y_out", "mean", "variance", "saved_mean", "saved_variance"], - place, - feed_dict={"y_out": y_grad}) - batch_norm_op_grad.run(scope, place) - - x_grad_tensor = create_or_get_tensor(scope, - grad_var_name("x_val"), None, - place) - scale_grad_tensor = create_or_get_tensor(scope, - grad_var_name("scale_val"), - None, place) - bias_grad_tensor = create_or_get_tensor(scope, - grad_var_name("bias_val"), - None, place) + y_grad = np.random.random_sample(shape).astype(np.float32) + x_grad, scale_grad, bias_grad = _reference_grad( + x, y_grad, scale, saved_mean, var_ref, epsilon, data_format) + + var_dict = locals() + var_dict['y@GRAD'] = y_grad + + var_names = [ + 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', + 'saved_variance' + ] + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape) + bn_op = block.append_op( + type="batch_norm", + inputs={ + "X": block.var('x'), + "Scale": block.var('scale'), + "Bias": block.var('bias'), + "Mean": block.var('mean'), + "Variance": block.var('variance') + }, + outputs={ + "Y": block.var('y'), + "MeanOut": block.var('mean'), # share the same memory + "VarianceOut": + block.var('variance'), # share the same memory + "SavedMean": block.var('saved_mean'), + "SavedVariance": block.var('saved_variance') + }, + attrs={ + "momentum": momentum, + "epsilon": epsilon, + "is_test": False, + "data_layout": data_layout + }) + block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) + + # generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + bn_op.desc, set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + exe = fluid.Executor(place) + out = exe.run( + program, + feed={ + name: var_dict[name] + for name in + ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] + }, + fetch_list=[ + 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', + 'x@GRAD', 'scale@GRAD', 'bias@GRAD' + ]) + + self.__assert_close(y, out[0], "y") + self.__assert_close(mean_out, out[1], "mean") + self.__assert_close(variance_out, out[2], "variance", 1e-3) + self.__assert_close(saved_mean, out[3], "saved_mean") + self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) + self.__assert_close(x_grad, out[5], "x_grad") + self.__assert_close(scale_grad, out[6], "scale_grad") + self.__assert_close(bias_grad, out[7], "bias_grad") - # check gradient output - self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad") - self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad") - self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad") - print "op test backward passed: ", str(place), data_layout + print "op test forward passed: ", str(place), data_layout places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): @@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest): for place in places: for data_format in ["NCHW", "NHWC"]: test_with_place(place, data_format, [2, 3, 4, 5]) - test_with_place(place, data_format, [2, 3]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_cond_op.py b/python/paddle/fluid/tests/unittests/test_cond_op.py deleted file mode 100644 index 66fbae961a2701e79da5222ae2689108335c4065..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_cond_op.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import paddle.fluid.core as core -import unittest -import numpy as np -from paddle.fluid.op import Operator, CondOp - - -class PySimpleCond(object): - ''' - A simple implementation of dynamic if-else based on numpy - ''' - - def __init__(self): - array = [1] * 10 - for i in range(1, 10, 2): - array[i] = 0 - self.cond = np.array(array) - self.x = np.ones(shape=(10, 1)).astype("float32") - - def forward(self): - self.index_t = np.where(self.cond == 1) - self.index_f = np.where(self.cond == 0) - y_t = self.x[self.index_t] - y_f = self.x[self.index_f] - y_t = y_t * 2. - y_f = y_f * (-2.) - output = np.zeros(shape=(10, 1)) - output[self.index_t] = y_t - output[self.index_f] = y_f - return output - - -class PySimpleCondTest(unittest.TestCase): - def setUp(self): - self.condnn = PySimpleCond() - - def test_forward(self): - output = self.condnn.forward() - - -def create_tensor(scope, name, shape, np_data): - tensor = scope.var(name).get_tensor() - tensor.set_dims(shape) - tensor.set(np_data, core.CPUPlace()) - return tensor - - -class TestCondOp(unittest.TestCase): - ''' - Test CondOp - - equation: - cond = [True, False, True, False, ...] - y[index_t] = x[index_t] * 2. - y[index_f] = x[index_f] * -2. - outputs: - y - ''' - - def setUp(self): - self.py_cond = PySimpleCond() - - def forward(self): - self.scope = core.Scope() - self.create_global_variables() - self.create_cond_op() - self.create_sub_net() - self.condop.run(self.scope, core.CPUPlace()) - return np.array(self.scope.find_var("Out").get_tensor()) - - def create_global_variables(self): - x_np_data = self.py_cond.x - create_tensor(self.scope, "X", [10, 1], x_np_data) - cond_np_data = self.py_cond.cond.astype("int32") - create_tensor(self.scope, "cond", [10, 1], cond_np_data) - self.scope.var("SubScopes") - self.scope.var("IndexTensors") - self.scope.var("Out") - - def create_cond_op(self): - self.condop = CondOp( - Cond="cond", - Xs=["X"], - Outs=["Out"], - SubScopes="SubScopes", - IndexTensors="IndexTensors") - - def create_sub_net(self): - truenet = core.Net.create() - scale_op_t = Operator("scale", X='X', Out='Out', scale=2.) - truenet.append_op(scale_op_t) - truenet.complete_add_op(True) - self.condop.set_truenet(truenet) - - falsenet = core.Net.create() - scale_op_t = Operator("scale", X='X', Out='Out', scale=-2.) - falsenet.append_op(scale_op_t) - falsenet.complete_add_op(True) - self.condop.set_falsenet(falsenet) - - def test_forward(self): - print 'test cond op forward' - pd_output = self.forward() - py_output = self.py_cond.forward() - print 'pd_output', pd_output - print - print 'py_output', py_output - self.assertEqual(pd_output.shape, py_output.shape) - print 'test passed' - return 0 - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py index 8c67e45b7fc997012af5f678f21271ad8b220edc..69365db4d104a1b69916a605534eff83e242289f 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py @@ -15,10 +15,8 @@ import unittest import numpy as np from operator import mul -from op_test import OpTest import paddle.fluid.core as core -from paddle.fluid.op import Operator -from paddle.fluid.framework import grad_var_name +import paddle.fluid as fluid np.random.random(123) @@ -70,161 +68,93 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): return grad_x, d_scale, d_bias -def get_backward_op(scope, op, no_grad_set): - backward_op = core.Operator.backward(op, no_grad_set) - for input in backward_op.input_vars(): - var = scope.var(input) - var.get_tensor() - for output in backward_op.output_vars(): - var = scope.var(output) - var.get_tensor() - return backward_op - - -def create_or_get_tensor(scope, var_name, var, place): - tensor = scope.var(var_name).get_tensor() - if var is not None: - assert isinstance(var, np.ndarray) - tensor.set_lod([[]]) - tensor.set_dims(var.shape) - tensor.set(var, place) - return tensor - - -def set_output_grad(scope, outputs, place, feed_dict=None): - def __set_tensor__(name, data=None): - out_tensor = scope.find_var(name).get_tensor() - grad_tensor = scope.var(grad_var_name(name)).get_tensor() - out_dtype = out_tensor.dtype() - if data is None: - if out_dtype == core.VarDesc.VarType.FP64: - data = np.ones(out_tensor.shape(), dtype=np.float64) - elif out_dtype == core.VarDesc.VarType.FP32: - data = np.ones(out_tensor.shape(), dtype=np.float32) - else: - raise ValueError("Not supported data type " + str(out_dtype)) - grad_tensor.set(data, place) - - for output in outputs: - data = None - if output in feed_dict: - data = feed_dict[output] - __set_tensor__(output, data) - - -class TestLayerNormdOp(OpTest): +class TestLayerNormdOp(unittest.TestCase): def __assert_close(self, tensor, np_array, msg, atol=1e-4): self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) - def __assert_grad_close(self, - tensor, - np_array, - name, - place, - max_relative_error=0.02): - a = np.array(tensor) - b = np_array - abs_a = np.abs(a) - abs_a[abs_a < 1e-5] = 1 - - diff_mat = np.abs(a - b) / abs_a - max_diff = np.max(diff_mat) - - def err_msg(): - offset = np.argmax(diff_mat > max_relative_error) - return ("%s Variable %s max gradient diff %f over limit %f, " - "the first error element is %d, %f, %f") % ( - "Gradient Check On %s" % str(place), name, max_diff, - max_relative_error, offset, a.flatten()[offset], - b.flatten()[offset]) - - self.assertLessEqual(max_diff, max_relative_error, err_msg()) - def check_forward_backward(self, shape, begin_norm_axis): - def test_with_place(place, shape, begin_norm_axis=1): - # setUp - assert begin_norm_axis > 0 and begin_norm_axis < len( - shape), 'begin_norm_axis must be between 0 and len(shape)-1.' + def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] - x_val = np.random.random_sample(x_shape).astype(np.float32) - scale_val = np.random.random_sample(scale_shape).astype(np.float32) - bias_val = np.random.random_sample(scale_shape).astype(np.float32) + np.random.seed(123) + x = np.random.random_sample(x_shape).astype(np.float32) + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) y_grad = np.random.random_sample(x_shape).astype(np.float32) - # run forward - y_out, saved_mean, var_ref = _reference_layer_norm_naive( - x_val, scale_val, bias_val, epsilon, begin_norm_axis) - naive_fw = {"Y": y_out, "Mean": saved_mean, "Variance": var_ref} - - # get gradient - x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad( - x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis) - naive_grad = { - "X": x_grad_ref, - "Scale": scale_grad_ref, - "Bias": bias_grad_ref - } - - scope = core.Scope() - - # create input - input_map = {"X": x_val, "Scale": scale_val, "Bias": bias_val} - for i_name in input_map: - create_or_get_tensor(scope, i_name, input_map[i_name], place) - - # create output - output_map = {"Y": None, "Mean": None, "Variance": None} - output_tensor = {} - for o_name in output_map: - output_tensor[o_name] = create_or_get_tensor( - scope, o_name, output_map[o_name], place) - - layer_norm_op = Operator( - "layer_norm", - # inputs - X="X", - Scale="Scale", - Bias="Bias", - # outputs - Y="Y", - Mean="Mean", - Variance="Variance", - # attrs - epsilon=epsilon, - begin_norm_axis=begin_norm_axis) - - layer_norm_op.run(scope, place) - - # check forward result - atol = 5e-2 if isinstance(place, core.CUDAPlace) else 1e-4 - for o_tensor in output_tensor: - self.__assert_close(output_tensor[o_tensor], naive_fw[o_tensor], - o_tensor, atol) - - # run backward - layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set()) - set_output_grad( - scope, ["Y", "Mean", "Variance"], - place, - feed_dict={"Y": y_grad}) - layer_norm_op_grad.run(scope, place) - - # get output - grad_tensor = {} - for o_name in naive_grad: - grad_tensor[o_name] = x_ = create_or_get_tensor( - scope, grad_var_name(o_name), None, place) - - # check gradient output - for o_grad in naive_grad: - self.__assert_grad_close(grad_tensor[o_grad], - naive_grad[o_grad], o_grad + "@GRAD", - place) + # reference forward & backward + y, mean, variance = _reference_layer_norm_naive( + x, scale, bias, epsilon, begin_norm_axis) + x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( + x, y_grad, scale, mean, variance, begin_norm_axis) + + var_dict = locals() + var_dict['y@GRAD'] = y_grad + var_names = [ + 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'y@GRAD' + ] + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape) + layer_norm_op = block.append_op( + type="layer_norm", + inputs={ + "X": block.var('x'), + "Scale": block.var('scale'), + "Bias": block.var('bias'), + }, + outputs={ + "Y": block.var('y'), + "Mean": block.var('mean'), # share the same memory + "Variance": + block.var('variance'), # share the same memory + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis + }) + + # generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + layer_norm_op.desc, set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + exe = fluid.Executor(place) + out = exe.run(program, + feed={ + name: var_dict[name] + for name in ['x', 'scale', 'bias', 'y@GRAD'] + }, + fetch_list=[ + 'y', 'mean', 'variance', 'x@GRAD', + 'scale@GRAD', 'bias@GRAD' + ]) + self.__assert_close(y, out[0], "y") + self.__assert_close(mean, out[1], "mean") + self.__assert_close(variance, out[2], "variance", 1e-3) + self.__assert_close(x_grad, out[3], "x_grad") + self.__assert_close(scale_grad, out[4], "scale_grad", 1e-3) + self.__assert_close(bias_grad, out[5], "bias_grad") places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): @@ -237,15 +167,6 @@ class TestLayerNormdOp(OpTest): self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) - def test_check_forward_backward_with_scale(self): - pass # TODO(zcd) - - def test_check_forward_backward_with_bias(self): - pass # TODO(zcd) - - def test_check_forward_backward(self): - pass # TODO(zcd) - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 2179826d81f715d6d280aea28a76f919330dd644..f88a6f1ce6e953c54da29f9e96199169b2cecd8b 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -32,7 +32,6 @@ class TestBook(unittest.TestCase): cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) self.assertIsNotNone(avg_cost) - program.append_backward(avg_cost) print(str(program)) @@ -94,8 +93,6 @@ class TestBook(unittest.TestCase): cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(cost) - program.append_backward(avg_cost) - print(str(program)) def test_word_embedding(self): diff --git a/python/paddle/fluid/tests/unittests/test_net.py b/python/paddle/fluid/tests/unittests/test_net.py deleted file mode 100644 index ae1699d647d7c0adab36200fb07bde12085053c1..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_net.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid.core as core -from paddle.fluid.op import Operator -import unittest - - -def fc(X, W, Y): - ret_v = core.Net.create() - - ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation")) - ret_v.append_op(Operator("sigmoid", X="pre_activation", Out=Y)) - ret_v.complete_add_op(True) - return ret_v - - -class TestNet(unittest.TestCase): - def test_net_all(self): - net = core.Net.create() - op1 = Operator("sum", X=["X", "Y"], Out="Out") - net.append_op(op1) - - net2 = core.Net.create() - net2.append_op(fc(X="X", W="w", Y="fc.out")) - net2.complete_add_op(True) - net.append_op(net2) - net.complete_add_op(True) - - expected = ''' -Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. - Op(sum), inputs:{X[X, Y]}, outputs:{Out[Out]}. - Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. - Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. - Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. - Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Out[fc.out]}. -''' - self.assertEqual(expected, "\n" + str(net)) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 3c00f708f08b6637acd731d23a5b9eb4eed12d2a..95845ea4de54ad43754ec5811d28ed52a8a3ae86 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -473,7 +473,7 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): loss = simple_fc_net(True) test_program = main.clone(for_test=True) - opt = fluid.optimizer.SGD(learning_rate=0.0001) + opt = fluid.optimizer.SGD(learning_rate=0.001) opt.minimize(loss) batch_size = 32 @@ -500,4 +500,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): train_loss, = train_exe.run([loss.name], feed_dict=feed_dict) train_loss = numpy.array(train_loss) - self.assertTrue(numpy.allclose(train_loss, test_loss)) + self.assertTrue( + numpy.allclose( + train_loss, test_loss, atol=1e-8), + "Train loss: " + str(train_loss) + "\n Test loss:" + + str(test_loss)) diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py index 87a2195f0d5c7fd355ea01a3c8f60908b33d4b9d..c51a48239330621d8e008415f81361616467cabf 100644 --- a/python/paddle/fluid/tests/unittests/test_program.py +++ b/python/paddle/fluid/tests/unittests/test_program.py @@ -87,57 +87,6 @@ class TestProgram(unittest.TestCase): print(prog) print(prog_restored) - def test_append_backward(self): - prog = Program() - block = prog.global_block() - - mul_x = block.create_var( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - mul_op = block.append_op( - type="mul", - inputs={"X": [mul_x], - "Y": mul_y}, - outputs={"Out": [mul_out]}, - attrs={"x_num_col_dims": 1}) - - add_y = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="add.y") - add_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="add.out") - add_op = block.append_op( - type="elementwise_add", - inputs={"X": mul_out, - "Y": add_y}, - outputs={"Out": add_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": add_out}, outputs={"Out": mean_out}) - - self.assertEqual(mul_op.idx, 0) - self.assertEqual(add_op.idx, 1) - param_to_grad = prog.append_backward(mean_out, set()) - - for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out", - "mean.out"): - self.assertEqual(param_to_grad[var_name][0], - grad_var_name(var_name)) - self.assertEqual(param_to_grad[var_name][1], 0) - - expect_ops = [ - "mul", "elementwise_add", "mean", "fill_constant", "mean_grad", - "elementwise_add_grad", "mul_grad" - ] - actual_ops = [] - for op in block.ops: - actual_ops.append(op.type) - self.assertEqual(actual_ops, expect_ops) - def test_program_clone_with_parameter(self): main_program = Program() startup_program = Program()